annotate multispecies_MicrosatDataGenerator_interrupted_GALAXY.pl @ 0:275433d3a395 draft

Uploaded tool tarball.
author devteam
date Wed, 25 Sep 2013 11:26:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/perl
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2 use strict;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3 use warnings;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4 use Term::ANSIColor;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5 use File::Basename;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
6 use IO::Handle;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
7 use Cwd;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
8 use File::Path;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
9 use vars qw($distance @thresholds @tags $species_set @allspecies $printer $treeSpeciesNum $focalspec $mergestarts $mergeends $mergemicros $interrtypecord $microscanned $interrcord $interr_poscord $no_of_interruptionscord $infocord $typecord $startcord $strandcord $endcord $microsatcord $motifcord $sequencepos $no_of_species $gapcord $prinkter);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
10 use File::Path qw(make_path remove_tree);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
11 use File::Temp qw/ tempfile tempdir /;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
12 my $tdir = tempdir( CLEANUP => 1 );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
13 chdir $tdir;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
14 my $dir = getcwd;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
15 #print "dir = $dir\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
16
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
17 #$ENV{'PATH'} .= ':' . dirname($0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
18 my $date = `date`;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
19
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
20 my ($mafile, $mafile_sputt, $orthfile, $threshold_array, $allspeciesin, $tree_definition_all, $separation) = @ARGV;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
21 if (!$mafile or !$mafile_sputt or !$orthfile or !$threshold_array or !$separation or !$tree_definition_all or !$allspeciesin) { die "missing arguments\n"; }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
22
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
23 $tree_definition_all =~ s/\s+//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
24 $threshold_array =~ s/\s+//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
25 $allspeciesin =~ s/\s+//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
26 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
27 # WHICH SPUTNIK USED?
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
28 my $sputnikpath = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
29 $sputnikpath = "sputnik_lowthresh_MATCH_MIN_SCORE3" ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
30 #$sputnikpath = "/Users/ydk/work/rhesus_microsat/codes/./sputnik_Mac-PowerPC";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
31 #print "sputnik_Mac-PowerPC non-existant\n" if !-e $sputnikpath;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
32 #exit if !-e $sputnikpath;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
33 #$sputnikpath = "bx-sputnik" ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
34 #print "ARGV input = @ARGV\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
35 #print "ARGV input :\n mafile=$mafile\n orthfile=$orthfile\n threshold_array=$threshold_array\n species_set=$species_set\n tree_definition=$tree_definition\n separation=$separation\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
36 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
37 # RUNFILE
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
38 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
39 $distance = 1; #bp
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
40 $distance++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
41 my @tree_definitions=MakeTrees($tree_definition_all);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
42 my $allspeciesset = $tree_definition_all;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
43 $allspeciesset =~ s/[\(\) ]+//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
44 @allspecies = split(/,/,$allspeciesset);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
45
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
46 my @outputfiles = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
47 my $round = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
48 #my $tdir = tempdir( CLEANUP => 0 );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
49 #chdir $tdir;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
50
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
51 foreach my $tree_definition (@tree_definitions){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
52 my @commas = ($tree_definition =~ /,/g) ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
53 #print "commas = @commas\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
54 next if scalar(@commas) <= 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
55 #print "species_set = $species_set\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
56 $treeSpeciesNum = scalar(@commas) + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
57 $species_set = $tree_definition;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
58 $species_set =~ s/[\)\( ;]+//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
59 #print "species_set = $species_set\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
60
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
61 $round++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
62 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
63 # MICROSATELLITE THRESHOLD SETTINGS (LENGTH, BP)
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
64 $threshold_array=~ s/,/_/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
65 my @thresharr = split("_",$threshold_array);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
66 @thresholds=@thresharr;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
67 #my $threshold_array = join("_",($mono_threshold, $di_threshold, $tri_threshold, $tetra_threshold));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
68 #print "current dit=$dir\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
69 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
70 # CREATE AXT FILES IN FORWARD AND REVERSE ORDERS IF NECESSARY
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
71 my @chrfiles=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
72
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
73 #my $mafile = "/Users/ydk/work/rhesus_microsat/results/galay/align.txt"; #$ARGV[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
74 my $chromt=int(rand(10000));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
75 my $p_chr=$chromt;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
76
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
77
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
78 my @exactspeciesset_unarranged = split(/,/,$species_set);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
79 $tree_definition=~s/[\)\(, ]/\t/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
80 my @treespecies=split(/\t+/,$tree_definition);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
81 my @exactspecies=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
82
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
83 foreach my $spec (@treespecies){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
84 foreach my $espec (@exactspeciesset_unarranged){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
85 push @exactspecies, $spec if $spec eq $espec;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
86 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
87 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
88 #print "exactspecies=@exactspecies\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
89 $focalspec = $exactspecies[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
90 my $arranged_species_set=join(".",@exactspecies);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
91 my $chr_name = join(".",("chr".$p_chr),$arranged_species_set, "net", "axt");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
92 my $chr_name_sputt = join(".",("chr".$p_chr),$arranged_species_set, "net", "axt_sputt");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
93 #print "sending to maftoAxt_multispecies: $mafile, $tree_definition, $chr_name, $species_set .. focalspec=$focalspec \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
94 maftoAxt_multispecies($mafile, $tree_definition, $chr_name, $species_set);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
95 maftoAxt_multispecies($mafile_sputt, $tree_definition, $chr_name_sputt, $species_set);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
96 #print "done maf to axt conversion\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
97 my $reverse_chr_name = join(".",("chr".$p_chr."r"),$arranged_species_set, "net", "axt");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
98 artificial_axdata_inverter ($chr_name, $reverse_chr_name);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
99 #print "reverse_chr_name=$reverse_chr_name\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
100 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
101 # FIND THE CORRESPONDING CHIMP CHROMOSOME FROM FILE ORTp_chrS.TXT
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
102 foreach my $direct ("reverse_direction","forward_direction"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
103 $p_chr=$chromt;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
104 #print "direction = $direct\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
105 $p_chr = $p_chr."r" if $direct eq "reverse_direction";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
106 $p_chr = $p_chr if $direct eq "forward_direction";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
107 my $config = $species_set;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
108 $config=~s/,/./g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
109 my @orgs = split(/\./,$arranged_species_set);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
110 #print "ORGS= @orgs\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
111 my @tag=@orgs;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
112
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
113
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
114 my $tags = join(",", @tag);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
115 my @tags=@tag;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
116 chomp $p_chr;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
117 $tags = join("_", split(/,/, $tags));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
118 my $pchr = "chr".$p_chr;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
119
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
120 my $ptag = $orgs[0]."-".$pchr.".".join(".",@orgs[1 ... scalar(@orgs)-1])."-".$threshold_array;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
121 my @sp_tags = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
122
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
123 # print "$ptag _ orthfile\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
124 #print "orgs=@orgs, pchr=$pchr, hence, ptag = $ptag\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
125 foreach my $sp (@tag){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
126 push(@sp_tags, ($sp.".".$ptag));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
127 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
128
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
129 my $preptag = $orgs[0]."-".$pchr.".".join(".",@orgs[1 ... scalar(@orgs)-1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
130 my @presp_tags = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
131
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
132 foreach my $sp (@tag){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
133 push(@presp_tags, ($sp.".".$preptag));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
134 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
135
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
136 my $resultdir = "";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
137 my $orthdir = "";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
138 my $filtereddir = "";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
139 my $pipedir = "";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
140
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
141 my @title_queries = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
142 push(@title_queries, "^[0-9]+");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
143 my $sep="\\s";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
144 for my $or (0 ... $#orgs){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
145 my $title = join($sep, ($orgs[$or], "[A-Za-z_]+[0-9a-zA-Z]+", "[0-9]+", "[0-9]+", "[\\-\\+]"));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
146 #$title =~ s/chr\\+\\s+\+/chr/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
147 push(@title_queries, $title);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
148 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
149 my $title_query = join($sep, @title_queries);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
150 #print "title_queries=@title_queries\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
151 #print "query = >$title_query<\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
152 #print "orgs = @orgs\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
153 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
154 # GET AXTNET FILES, EDIT THEM AND SPLIT THEM INTO HUMAN AND CHIMP INPUT FILES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
155 my $t1input = $pchr.".".$arranged_species_set.".net.axt";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
156
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
157 my @t1outputs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
158
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
159 foreach my $sp (@presp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
160 push(@t1outputs, $sp."_gap_op");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
161 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
162
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
163
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
164
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
165 multi_species_t1($t1input,$tags,(join(",", @t1outputs)), $title_query);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
166 #print "t1outputs=@t1outputs\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
167 #print "done t1\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
168 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
169 #START T2.PL
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
170
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
171 my $stag = (); my $tag1 = (); my $tag2 = (); my $schrs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
172
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
173 for my $t (0 ... scalar(@tags)-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
174 multi_species_t2($t1outputs[$t], $tag[$t]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
175 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
176 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
177 #START T2.2.PL
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
178
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
179 my @temp_tags = @tag;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
180
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
181 foreach my $sp (@presp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
182 my $t2input = $sp."_nogap_op_unrand";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
183 multi_species_t2_2($t2input, shift(@temp_tags));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
184 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
185 undef (@temp_tags);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
186
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
187 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
188 #START SPUTNIK
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
189
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
190 my @jobIDs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
191 @temp_tags = @tag;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
192 my @sput_filelist = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
193
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
194 foreach my $sp (@presp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
195 #print "sp = $sp\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
196 my $sputnikoutput = $pipedir.$sp."_sput_op0";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
197 my $sputnikinput = $pipedir.$sp."_nogap_op_unrand";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
198 push(@sput_filelist, $sputnikinput);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
199 my $sputnikcommand = $sputnikpath." ".$sputnikinput." > ".$sputnikoutput;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
200 # print "$sputnikcommand\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
201 my @sputnikcommand_system = $sputnikcommand;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
202 system(@sputnikcommand_system);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
203 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
204
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
205 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
206 #START SPUTNIK OUTPUT CORRECTOR
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
207
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
208 foreach my $sp (@presp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
209 my $corroutput = $pipedir.$sp."_sput_op1";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
210 my $corrinput = $pipedir.$sp."_sput_op0";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
211 sputnikoutput_corrector($corrinput,$corroutput);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
212
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
213 my $t4output = $pipedir.$sp."_sput_op2";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
214 multi_species_t4($corroutput,$t4output);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
215
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
216 my $t5output = $pipedir.$sp."_sput_op3";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
217 multi_species_t5($t4output,$t5output);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
218 #print "done t5.pl for $sp\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
219
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
220 my $t6output = $pipedir.$sp."_sput_op4";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
221 multi_species_t6($t5output,$t6output,scalar(@orgs));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
222 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
223 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
224 #START T9.PL FOR T10.PL AND FOR INTERRUPTED HUNTING
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
225
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
226 foreach my $sp (@presp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
227 my $t9output = $pipedir.$sp."_gap_op_unrand_match";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
228 my $t9sequence = $pipedir.$sp."_gap_op_unrand2";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
229 my $t9micro = $pipedir.$sp."_sput_op4";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
230 t9($t9micro,$t9sequence,$t9output);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
231
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
232 my $t9output2 = $pipedir.$sp."_nogap_op_unrand2_match";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
233 my $t9sequence2 = $pipedir.$sp."_nogap_op_unrand2";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
234 t9($t9micro,$t9sequence2,$t9output2);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
235 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
236 #print "done both t9.pl for all orgs\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
237
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
238 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
239 # FIND COMPOUND MICROSATELLITES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
240
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
241 @jobIDs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
242 my $species_counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
243
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
244 foreach my $sp (@presp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
245 my $simple_microsats=$pipedir.$sp."_sput_op4_simple";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
246 my $compound_microsats=$pipedir.$sp."_sput_op4_compound";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
247 my $input_micro = $pipedir.$sp."_sput_op4";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
248 my $input_seq = $pipedir.$sp."_nogap_op_unrand2_match";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
249 multiSpecies_compound_microsat_hunter3($input_micro,$input_seq,$simple_microsats,$compound_microsats,$orgs[$species_counter], scalar(@sp_tags), $threshold_array );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
250 $species_counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
251 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
252
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
253 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
254 # READING AND FILTERING SIMPLE MICROSATELLITES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
255 my $spcounter2=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
256 foreach my $sp (@sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
257 my $presp = $presp_tags[$spcounter2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
258 $spcounter2++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
259 my $simple_microsats=$pipedir.$presp."_sput_op4_simple";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
260 my $simple_filterout = $pipedir.$sp."_sput_op4_simple_filtered";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
261 my $simple_residue = $pipedir.$sp."_sput_op4_simple_residue";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
262 multiSpecies_filtering_interrupted_microsats($simple_microsats, $simple_filterout, $simple_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
263 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
264
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
265 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
266 # ANALYZE COMPOUND MICROSATELLITES FOR BEING INTERRUPTED MICROSATS
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
267
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
268 $species_counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
269 foreach my $sp (@sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
270 my $presp = $presp_tags[$species_counter];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
271 my $compound_microsats = $pipedir.$presp."_sput_op4_compound";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
272 my $analyzed_simple_microsats=$pipedir.$presp."_sput_op4_compound_interrupted";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
273 my $analyzed_compound_microsats=$pipedir.$presp."_sput_op4_compound_pure";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
274 my $seq_file = $pipedir.$presp."_nogap_op_unrand2_match";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
275 multiSpecies_compound_microsat_analyzer($compound_microsats,$seq_file,$analyzed_simple_microsats,$analyzed_compound_microsats,$orgs[$species_counter], scalar(@sp_tags));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
276 $species_counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
277 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
278 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
279 # REANALYZE COMPOUND MICROSATELLITES FOR PRESENCE OF SIMPLE ONES WITHIN THEM..
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
280 $species_counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
281
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
282 foreach my $sp (@sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
283 my $presp = $presp_tags[$species_counter];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
284 my $compound_microsats = $pipedir.$presp."_sput_op4_compound_pure";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
285 my $compound_interrupted = $pipedir.$presp."_sput_op4_compound_clarifiedInterrupted";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
286 my $compound_compound = $pipedir.$presp."_sput_op4_compound_compound";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
287 my $seq_file = $pipedir.$presp."_nogap_op_unrand2_match";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
288 multiSpecies_compoundClarifyer($compound_microsats,$seq_file,$compound_interrupted,$compound_compound,$orgs[$species_counter], scalar(@sp_tags), "2_4_6_8", "3_4_6_8", "2_4_6_8");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
289 $species_counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
290 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
291 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
292 # READING AND FILTERING SIMPLE AND COMPOUND MICROSATELLITES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
293 $species_counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
294
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
295 foreach my $sp (@sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
296 my $presp = $presp_tags[$species_counter];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
297
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
298 my $simple_microsats=$pipedir.$presp."_sput_op4_compound_clarifiedInterrupted";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
299 my $simple_filterout = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_filtered";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
300 my $simple_residue = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_residue";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
301 multiSpecies_filtering_interrupted_microsats($simple_microsats, $simple_filterout, $simple_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
302
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
303 my $simple_microsats2 = $pipedir.$presp."_sput_op4_compound_interrupted";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
304 my $simple_filterout2 = $pipedir.$sp."_sput_op4_compound_interrupted_filtered";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
305 my $simple_residue2 = $pipedir.$sp."_sput_op4_compound_interrupted_residue";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
306 multiSpecies_filtering_interrupted_microsats($simple_microsats2, $simple_filterout2, $simple_residue2,$threshold_array,$threshold_array,scalar(@sp_tags));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
307
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
308 my $compound_microsats=$pipedir.$presp."_sput_op4_compound_compound";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
309 my $compound_filterout = $pipedir.$sp."_sput_op4_compound_compound_filtered";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
310 my $compound_residue = $pipedir.$sp."_sput_op4_compound_compound_residue";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
311 multispecies_filtering_compound_microsats($compound_microsats, $compound_filterout, $compound_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
312 $species_counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
313 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
314 #print "done filtering both simple and compound microsatellites \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
315
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
316 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
317
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
318 my @combinedarray = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
319 my @combinedarray_indicators = ("mononucleotide", "dinucleotide", "trinucleotide", "tetranucleotide");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
320 my @combinedarray_tags = ("mono", "di", "tri", "tetra");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
321 $species_counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
322
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
323 foreach my $sp (@sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
324 my $simple_interrupted = $pipedir.$sp."_simple_analyzed_simple";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
325 push @{$combinedarray[$species_counter]}, $pipedir.$sp."_simple_analyzed_simple_mono", $pipedir.$sp."_simple_analyzed_simple_di", $pipedir.$sp."_simple_analyzed_simple_tri", $pipedir.$sp."_simple_analyzed_simple_tetra";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
326 $species_counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
327 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
328
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
329 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
330 # PUT TOGETHER THE INTERRUPTED AND SIMPLE MICROSATELLITES BASED ON THEIR MOTIF SIZE FOR FURTHER EXTENTION
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
331 my $sp_counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
332 foreach my $sp (@sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
333 my $analyzed_simple = $pipedir.$sp."_sput_op4_compound_interrupted_filtered";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
334 my $clarifyed_simple = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_filtered";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
335 my $simple = $pipedir.$sp."_sput_op4_simple_filtered";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
336 my $simple_analyzed_simple = $pipedir.$sp."_simple_analyzed_simple";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
337 `cat $analyzed_simple $clarifyed_simple $simple > $simple_analyzed_simple`;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
338 for my $i (0 ... 3){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
339 `grep "$combinedarray_indicators[$i]" $simple_analyzed_simple > $combinedarray[$sp_counter][$i]`;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
340 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
341 $sp_counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
342 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
343 #print "\ndone grouping interrupted & simple microsats based on their motif size for further extention\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
344
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
345 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
346 # BREAK CHROMOSOME INTO PARTS OF CERTAIN NO. CONTIGS EACH, FOR FUTURE SEARCHING OF INTERRUPTED MICROSATELLITES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
347 # ESPECIALLY DI, TRI AND TETRANUCLEOTIDE MICROSATELLITES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
348 @temp_tags = @sp_tags;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
349 my $increment = 1000000;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
350 my @splist = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
351 my $targetdir = $pipedir;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
352 $species_counter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
353
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
354 foreach my $sp (@sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
355 my $presp = $presp_tags[$species_counter];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
356 $species_counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
357 my $localtag = shift @temp_tags;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
358 my $locallist = $targetdir.$localtag."_".$p_chr."_list";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
359 push(@splist, $locallist);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
360 my $input = $pipedir.$presp."_nogap_op_unrand2_match";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
361 chromosome_unrand_breaker($input,$targetdir,$locallist,$increment, $localtag, $pchr);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
362 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
363
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
364
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
365 my @unionarray = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
366 #print "splist=@splist\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
367 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
368 # FIND INTERRUPTED MICROSATELLITES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
369
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
370 $species_counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
371
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
372 for my $i (0 .. $#combinedarray){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
373
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
374 @jobIDs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
375 open (JLIST1, "$splist[$i]") or die "Cannot open file $splist[$i]: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
376
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
377 while (my $sp1 = <JLIST1>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
378 #print "$splist[$i]: sp1=$sp1\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
379 chomp $sp1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
380
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
381 for my $j (0 ... $#combinedarray_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
382 my $interr = $sp1."_interr_".$combinedarray_tags[$j];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
383 my $simple = $sp1."_simple_".$combinedarray_tags[$j];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
384 push @{$unionarray[$i]}, $interr, $simple;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
385 multiSpecies_interruptedMicrosatHunter($combinedarray[$i][$j],$sp1,$interr ,$simple, $orgs[$species_counter], scalar(@sp_tags), "3_4_6_8");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
386 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
387 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
388 $species_counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
389 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
390 close JLIST1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
391 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
392 # REUNION AND ZIPPING BEFORE T10.PL
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
393
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
394 my @allarray = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
395
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
396 for my $i (0 ... $#sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
397 my $localfile = $pipedir.$sp_tags[$i]."_allmicrosats";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
398 unlink $localfile if -e $localfile;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
399 push(@allarray, $localfile);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
400
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
401 my $unfiltered_localfile= $localfile."_unfiltered";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
402 my $residue_localfile= $localfile."_residue";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
403
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
404 unlink $unfiltered_localfile;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
405 #unlink $unfiltered_localfile;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
406 for my $j (0 ... $#{$unionarray[$i]}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
407 #print "listing files for species $i and list number $j= \n$unionarray[$i][$j] \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
408 `cat $unionarray[$i][$j] >> $unfiltered_localfile`;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
409 unlink $unionarray[$i][$j];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
410 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
411
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
412 multiSpecies_filtering_interrupted_microsats($unfiltered_localfile, $localfile, $residue_localfile,$threshold_array,$threshold_array,scalar(@sp_tags) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
413 my $analyzed_compound = $pipedir.$sp_tags[$i]."_sput_op4_compound_compound_filtered";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
414 my $simple_residue = $pipedir.$sp_tags[$i]."_sput_op4_simple_residue";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
415 my $compound_residue = $pipedir.$sp_tags[$i]."_sput_op4_compound_residue";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
416
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
417 `cat $analyzed_compound >> $localfile`;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
418 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
419 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
420 # MERGING MICROSATELLITES THAT ARE VERY CLOSE TO EACH OTHER, INCLUDING THOSE FOUND BY SEARCHING IN 2 OPPOSIT DIRECTIONS
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
421
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
422 my $toescape=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
423
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
424
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
425 for my $i (0 ... $#sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
426 my $localfile = $pipedir.$sp_tags[$i]."_allmicrosats";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
427 $localfile =~ /$focalspec\-(chr[0-9a-zA-Z]+)\./;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
428 my $direction = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
429 #print "localfile = $localfile , direction = $direction\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
430 # `gzip $reverse_chr_name` if $direction =~ /chr[0-9a-zA-Z]+r/ && $switchboard{"deleting_processFiles"} != 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
431 $toescape =1 if $direction =~ /chr[0-9a-zA-Z]+r/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
432 last if $direction =~ /chr[0-9a-zA-Z]+r/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
433 my $nogap_sequence = $pipedir.$presp_tags[$i]."_nogap_op_unrand2_match";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
434 my $gap_sequence = $pipedir.$presp_tags[$i]."_gap_op_unrand_match";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
435 my $reverselocal = $localfile;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
436 $reverselocal =~ s/\-chr([0-9a-zA-Z]+)\./-chr$1r./g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
437 merge_interruptedMicrosats($nogap_sequence,$localfile, $reverselocal ,scalar(@sp_tags));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
438 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
439 my $forward_separate = $localfile."_separate";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
440 my $reverse_separate = $reverselocal."_separate";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
441 my $diff = $forward_separate."_diff";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
442 my $miss = $forward_separate."_miss";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
443 my $common = $forward_separate."_common";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
444 forward_reverse_sputoutput_comparer($nogap_sequence,$forward_separate, $reverse_separate, $diff, $miss, $common ,scalar(@sp_tags));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
445 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
446 my $symmetrical_file = $localfile."_symmetrical";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
447 my $merged_file = $localfile."_merged";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
448 #print "cating: $merged_file $common into -> $symmetrical_file \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
449 `cat $merged_file $common > $symmetrical_file`;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
450 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
451 my $t10output = $symmetrical_file."_fin_hit_all_2";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
452 new_multispecies_t10($gap_sequence, $symmetrical_file, $t10output, join(".", @orgs));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
453 #-------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
454 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
455 next if $toescape == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
456 #------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
457 # BRINGING IT ALL TOGETHER: FINDING ORTHOLOGOUS MICROSATELLITES AMONG THE SPECIES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
458
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
459
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
460 my @micros_array = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
461 my $sampletag = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
462 for my $i (0 ... $#sp_tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
463 my $finhitFile = $pipedir.$sp_tags[$i]."_allmicrosats_symmetrical_fin_hit_all_2";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
464 push(@micros_array, $finhitFile);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
465 $sampletag = $sp_tags[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
466 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
467 #$sampletag =~ s/^([A-Z]+\.)/ORTH_/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
468 #$sampletag = $sampletag."_monoThresh-".$mono_threshold."bp";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
469 my $orthfiletemp = $ptag."_orthfile";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
470 my $orthanswer = multiSpecies_orthFinder4($t1input, join(":",@micros_array), $orthfiletemp, join(":", @orgs), $separation);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
471
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
472 my $maskedorthfiletemp = $ptag."_orthfile_masked";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
473 qualityFilter ($orthfiletemp, $chr_name_sputt, $maskedorthfiletemp);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
474
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
475 push @outputfiles , $maskedorthfiletemp;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
476 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
477 $date = `date`;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
478 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
479
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
480 `cat @outputfiles > $orthfile`;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
481
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
482 my $rootdir = $dir;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
483 $rootdir =~ s/\/[A-Za-z0-9\-_]+$//;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
484 chdir $rootdir;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
485 remove_tree($dir);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
486
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
487 #print "date = $date\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
488 #remove_tree($tdir);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
489 #------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
490 #------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
491 #------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
492 #------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
493
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
494 #xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
495
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
496 sub maftoAxt_multispecies {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
497 #print "in maftoAxt_multispecies : got @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
498 my $fname=$_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
499 open(IN,"<$_[0]") or die "Cannot open $_[0]: $! \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
500 my $treedefinition = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
501 open(OUT,">$_[2]") or die "Cannot open $_[2]: $! \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
502 my $counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
503 my $exactspeciesset = $_[3];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
504 my @exactspeciesset_unarranged = split(/,/,$exactspeciesset);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
505
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
506 $treedefinition=~s/[\)\(, ]/\t/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
507 my @species=split(/\t+/,$treedefinition);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
508 my @exactspecies=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
509
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
510 foreach my $spec (@species){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
511 foreach my $espec (@exactspeciesset_unarranged){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
512 push @exactspecies, $spec if $spec eq $espec;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
513 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
514 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
515 #print "exactspecies=@exactspecies\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
516
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
517 ###########
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
518 my $select = 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
519 #select = 1 if all species need sequences to be present for each block otherwise, it is 0
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
520 #select = 2 only the allowed set make up the alignment. use the removeset
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
521 # information to detect alignmenets that have other important genomes aligned.
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
522 ###########
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
523 my @allowedset = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
524 @allowedset = split(/;/,allowedSetOfSpecies(join("_",@species))) if $select == 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
525 @allowedset = join("_",0,@species) if $select == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
526 #print "species = @species , allowedset =",join("\n", @allowedset) ," \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
527 @allowedset = join("_",0,@exactspecies) if $select == 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
528 #print "allowedset = @allowedset and exactspecies = @exactspecies\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
529
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
530 my $start = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
531 my @sequences = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
532 my @titles = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
533 my $species_counter = "0";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
534 my $countermatch = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
535 my $outsideSpecies=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
536
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
537 while(my $line = <IN>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
538 # print $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
539 next if $line =~ /^#/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
540 next if $line =~ /^i/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
541 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
542 my @fields = split(/\s+/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
543 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
544 if ($line =~ /^a /){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
545 $start = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
546 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
547
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
548 if ($line =~ /^s /){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
549
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
550 foreach my $sp (@allspecies){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
551 # print "checking species $sp\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
552 if ($fields[1] =~ /$sp/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
553 $species_counter = $species_counter."_".$sp;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
554 push(@sequences, $fields[6]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
555 my @sp_info = split(/\./,$fields[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
556 my $title = join(" ",@sp_info, $fields[2], ($fields[2]+$fields[3]), $fields[4]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
557 push(@titles, $title);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
558 # print "species_counter = $species_counter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
559 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
560 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
561 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
562
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
563 if (($line !~ /^a/) && ($line !~ /^s/) && ($line !~ /^#/) && ($line !~ /^i/) && ($start = 1)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
564 # print "species_counter = $species_counter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
565 my $arranged = reorderSpecies($species_counter, @allspecies);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
566 my $stopper = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
567 my $arrno = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
568
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
569 # print "checking if ", scalar(@sequences), " match @exactspecies allowedset=@allowedset\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
570 if (scalar(@sequences) == scalar(@exactspecies)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
571 foreach my $set (@allowedset){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
572 # print "testing $arranged against $set\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
573 if ($arranged eq $set){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
574 $stopper = 0; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
575 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
576 $arrno++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
577 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
578 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
579 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
580 $stopper = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
581 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
582
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
583
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
584 if ($stopper == 0) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
585 @titles = split ";", orderInfo(join(";", @titles), $species_counter, $arranged) if $species_counter ne $arranged;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
586 @sequences = split ";", orderInfo(join(";", @sequences), $species_counter, $arranged) if $species_counter ne $arranged;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
587 my $filteredseq = filter_gaps(@sequences);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
588
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
589 if ($filteredseq ne "SHORT"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
590 #print "printing"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
591 $counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
592 print OUT join (" ",$counter, @titles), "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
593 print OUT $filteredseq, "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
594 print OUT "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
595 $countermatch++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
596 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
597 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
598 else{ #print "nexting\n";<STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
599 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
600
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
601 @sequences = (); @titles = (); $start = 0;$species_counter = "0";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
602 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
603
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
604 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
605 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
606 # print "countermatch = $countermatch\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
607 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
608
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
609 sub reorderSpecies{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
610 my @inarr=@_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
611 my $currSpecies = shift (@inarr);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
612 my $ordered_species = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
613 my @species=@inarr;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
614 #print "species = @species\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
615 foreach my $order (@species){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
616 $ordered_species = $ordered_species."_".$order if $currSpecies=~ /$order/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
617 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
618 return $ordered_species;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
619
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
620 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
621
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
622 sub filter_gaps{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
623 my @sequences = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
624 # print "sequences sent are @sequences\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
625 my $seq_length = length($sequences[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
626 my $seq_no = scalar(@sequences);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
627 my $allgaps = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
628 for (1 ... $seq_no){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
629 $allgaps = $allgaps."-";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
630 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
631
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
632 my @seq_array = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
633 my $seq_counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
634 foreach my $seq (@sequences){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
635 # my @sequence = split(/\s*/,$seq);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
636 $seq_array[$seq_counter] = [split(/\s*/,$seq)];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
637 # push @seq_array, [@sequence];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
638 $seq_counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
639 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
640 my $g = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
641 while ( $g < $seq_length){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
642 last if (!exists $seq_array[0][$g]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
643 my $bases = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
644 for my $u (0 ... $#seq_array){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
645 $bases = $bases.$seq_array[$u][$g];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
646 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
647 # print $bases, "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
648 if ($bases eq $allgaps){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
649 # print "bases are $bases, position is $g \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
650 for my $seq (@seq_array){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
651 splice(@$seq , $g, 1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
652 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
653 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
654 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
655 $g++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
656 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
657 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
658
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
659 my @outs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
660
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
661 foreach my $seq (@seq_array){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
662 push(@outs, join("",@$seq));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
663 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
664 return "SHORT" if length($outs[0]) <=100;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
665 return (join("\n", @outs));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
666 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
667
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
668
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
669 sub allowedSetOfSpecies{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
670 my @allowed_species = split(/_/,$_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
671 unshift @allowed_species, 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
672 # print "allowed set = @allowed_species \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
673 my @output = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
674 for (0 ... scalar(@allowed_species) - 4){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
675 push(@output, join("_",@allowed_species));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
676 pop @allowed_species;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
677 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
678 return join(";",reverse(@output));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
679
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
680 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
681
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
682
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
683 sub orderInfo{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
684 my @info = split(/;/,$_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
685 # print "info = @info";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
686 my @old = split(/_/,$_[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
687 my @new = split(/_/,$_[2]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
688 shift @old; shift @new;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
689 my @outinfo = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
690 foreach my $spe (@new){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
691 for my $no (0 ... $#old){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
692 if ($spe eq $old[$no]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
693 push(@outinfo, $info[$no]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
694 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
695 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
696 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
697 # print "outinfo = @outinfo \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
698 return join(";", @outinfo);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
699 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
700
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
701 #xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
702
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
703 #xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
704 sub artificial_axdata_inverter{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
705 open(IN,"<$_[0]") or die "Cannot open file $_[0]: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
706 open(OUT,">$_[1]") or die "Cannot open file $_[1]: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
707 my $linecounter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
708 while (my $line = <IN>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
709 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
710 #print "$linecounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
711 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
712 my $final_line = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
713 my $trycounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
714 if ($line =~ /^[a-zA-Z\-]/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
715 # while ($final_line eq $line){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
716 my @fields = split(/\s*/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
717
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
718 $final_line = join("",reverse(@fields));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
719 # print colored ['red'], "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
720 # $trycounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
721 # print "trying again....$trycounter : $final_line\n" if $final_line eq $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
722 # }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
723 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
724
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
725 # print colored ['yellow'], "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
726 if ($line =~ /^[0-9]/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
727 $line =~ s/chr([A-Z0-9a-b]+)/chr$1r/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
728 $final_line = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
729 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
730 print OUT $final_line,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
731 #print "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
732 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
733 close OUT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
734 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
735 #xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
736
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
737
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
738 #xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
739
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
740 sub multi_species_t1 {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
741
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
742 my $input1 = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
743 #print "@_\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
744 my @tags = split(/_/, $_[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
745 my @outputs = split(/,/, $_[2]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
746 my $title_query = $_[3];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
747 my @handles = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
748
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
749 open(FILEB,"<$input1")or die "Cannot open file: $input1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
750 my $i = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
751 foreach my $path (@outputs){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
752 $handles[$i] = IO::Handle->new();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
753 open ($handles[$i], ">$path") or die "Can't open $path : $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
754 $i++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
755 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
756
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
757 my $curdef;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
758 my $start = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
759
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
760 while (my $line = <FILEB> ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
761 if ($line =~ /^\d/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
762 $line =~ s/ +/\t/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
763 my @fields = split(/\s+/, $line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
764 if (($line =~ /$title_query/)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
765 my $title = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
766 my $counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
767 foreach my $tag (@tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
768 $line = <FILEB>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
769 print {$handles[$counter]} ">",$tag,"\t",$title, " ",$line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
770 $counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
771 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
772 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
773 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
774 foreach my $tag (@tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
775 my $tine = <FILEB>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
776 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
777 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
778
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
779 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
780 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
781
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
782 foreach my $hand (@handles){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
783 $hand->close();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
784 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
785
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
786 close FILEB;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
787 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
788
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
789 #xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
790
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
791 #xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
792
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
793 sub multi_species_t2{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
794
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
795 my $input = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
796 my $species = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
797 my $output1 = $input."_unr";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
798
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
799 #------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
800 open (FILEF1, "<$input") or die "Cannot open file $input :$!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
801 open (FILEF2, ">$output1") or die "Cannot open file $output1 :$!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
802
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
803 my $line1 = <FILEF1>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
804
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
805 while($line1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
806 {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
807 # chomp($line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
808 if ($line1 =~ (m/^\>$species/)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
809 chomp($line1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
810 print FILEF2 $line1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
811 $line1 = <FILEF1>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
812 chomp($line1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
813 print FILEF2 "\t", $line1,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
814 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
815 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
816 $line1 = <FILEF1>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
817 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
818
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
819 close FILEF1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
820 close FILEF2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
821 #------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
822
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
823 my $output2 = $output1."and";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
824 my $output3 = $output1."and2";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
825 open(IN,"<$output1");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
826 open (FILEF3, ">$output2");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
827 open (FILEF4, ">$output3");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
828
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
829
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
830 while (<IN>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
831 my $line = $_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
832 chomp($line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
833 my @fields=split (/\t/, $line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
834 # print $line,"\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
835 if($line !~ /random/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
836 print FILEF3 join ("\t",@fields[0 ... scalar(@fields)-2]), "\n", $fields[scalar(@fields)-1], "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
837 print FILEF4 join ("\t",@fields[0 ... scalar(@fields)-2]), "\t", $fields[scalar(@fields)-1], "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
838 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
839 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
840
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
841
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
842 close IN;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
843 close FILEF3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
844 close FILEF4;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
845 unlink $output1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
846
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
847 #------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
848 # OLD T3.PL RUDIMENT
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
849
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
850 my $t3output = $output2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
851 $t3output =~ s/gap_op_unrand/nogap_op_unrand/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
852
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
853 open(IN,"<$output2");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
854 open(OUTA,">$t3output");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
855
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
856
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
857 while (<IN>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
858 s/-//g unless /^>/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
859 print OUTA;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
860 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
861
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
862 close IN;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
863 close OUTA;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
864 #------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
865 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
866 #xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
867
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
868
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
869 #xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxxmulti_species_t2_2 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
870 sub multi_species_t2_2{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
871 #print "IN multi_species_t2_2 : @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
872 my $input = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
873 my $species = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
874 my $output1 = $input."2";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
875
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
876
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
877 open (FILEF1, "<$input");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
878 open (FILEF2, ">$output1");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
879
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
880 my $line1 = <FILEF1>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
881
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
882 while($line1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
883 {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
884 # chomp($line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
885 if ($line1 =~ (m/^\>$species/)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
886 chomp($line1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
887 print FILEF2 $line1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
888 $line1 = <FILEF1>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
889 chomp($line1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
890 print FILEF2 "\t", $line1,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
891 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
892 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
893 $line1 = <FILEF1>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
894 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
895
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
896 close FILEF1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
897 close FILEF2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
898 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
899
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
900 #xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
901
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
902
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
903 #xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
904 sub sputnikoutput_corrector{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
905 my $input = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
906 my $output = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
907 open(IN,"<$input") or die "Cannot open file $input :$!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
908 open(OUT,">$output") or die "Cannot open file $output :$!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
909 my $tine;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
910 while (my $line=<IN>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
911 if($line =~/length /){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
912 $tine = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
913 $tine =~ s/\s+/\t/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
914 my @fields = split(/\t/,$tine);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
915 if ($fields[6] > 60){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
916 print OUT $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
917 $line = <IN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
918
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
919 while (($line !~ /nucleotide/) && ($line !~ /^>/)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
920 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
921 print OUT $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
922 $line = <IN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
923 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
924 print OUT "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
925 print OUT $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
926 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
927 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
928 print OUT $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
929 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
930 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
931 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
932 print OUT $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
933 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
934 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
935 close IN;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
936 close OUT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
937 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
938 #xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
939
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
940
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
941 #xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
942 sub multi_species_t4{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
943 # print "multi_species_t4 : @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
944 my $input = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
945 my $output = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
946 open (FILEA, "<$input");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
947 open (FILEB, ">$output");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
948
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
949 my $line = <FILEA>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
950
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
951 while ($line) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
952 # chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
953 if ($line =~ />/) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
954 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
955 print FILEB $line, "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
956 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
957
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
958
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
959 if ($line =~ /^m/ | $line =~ /^d/ | $line =~ /^t/ | $line =~ /^p/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
960 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
961 print FILEB $line, " " ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
962 $line = <FILEA>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
963 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
964 print FILEB $line,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
965 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
966
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
967 $line = <FILEA>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
968 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
969
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
970
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
971 close FILEA;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
972 close FILEB;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
973
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
974 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
975
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
976 #xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
977
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
978
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
979 #xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
980 sub multi_species_t5{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
981
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
982 my $input = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
983 my $output = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
984
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
985 open(FILEB,"<$input");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
986 open(FILEC,">$output");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
987
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
988 my $curdef;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
989
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
990 while (my $line = <FILEB> ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
991
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
992 if ($line =~ /^>/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
993 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
994 $curdef = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
995 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
996 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
997
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
998 if ($line =~ /^m/ | $line =~ /^d/ | $line =~ /^t/ | $line =~ /^p/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
999 print FILEC $curdef," ",$line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1000 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1001
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1002 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1003
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1004
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1005 close FILEB;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1006 close FILEC;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1007
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1008 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1009 #xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1010
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1011
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1012 #xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1013 sub multi_species_t6{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1014 my $input = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1015 my $output = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1016 my $focalstrand=$_[3];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1017 # print "inpput = @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1018 open (FILE, "<$input");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1019 open (FILE_MICRO, ">$output");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1020 my $linecounter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1021 while (my $line = <FILE>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1022 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1023 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1024 #print "line = $line\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1025 #MONO#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1026 $line =~ /$focalspec\s[a-zA-Z]+[0-9a-zA-Z]+\s[0-9]+\s[0-9]+\s([+\-])/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1027 my $strand=$1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1028 my $no_of_species = ($line =~ s/\s+[+\-]\s+/ /g);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1029 #print "line = $line\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1030 my $specfieldsend = 2 + ($no_of_species*4) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1031 my @fields = split(/\s+/, $line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1032 my @speciesdata = @fields[0 ... $specfieldsend];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1033 $line =~ /([a-z]+nucleotide)\s([0-9]+)\s:\s([0-9]+)/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1034 my ($tide, $start, $end) = ($1, $2, $3);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1035 #print "no_of_species=$no_of_species.. speciesdata = @speciesdata and ($tide, $start, $end)\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1036 if($line =~ /mononucleotide/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1037 print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields], mono($fields[$#fields]),),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1038 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1039 #DI#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1040 elsif($line =~ /dinucleotide/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1041 print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields], di($fields[$#fields]),),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1042 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1043 #TRI#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1044 elsif($line =~ /trinucleotide/ ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1045 print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields], tri($fields[$#fields]),),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1046 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1047 #TETRA#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1048 elsif($line =~ /tetranucleotide/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1049 print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields], tetra($fields[$#fields]),),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1050 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1051 #PENTA#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1052 elsif($line =~ /pentanucleotide/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1053 #print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields], penta($fields[$#fields]),),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1054 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1055 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1056 # print "not: @fields\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1057 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1058 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1059 # print "linecounter=$linecounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1060 close FILE;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1061 close FILE_MICRO;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1062 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1063
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1064 sub mono {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1065 my $st = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1066 my $tp = unpack "A1"x(length($st)/1),$st;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1067 my $var1 = substr($tp, 0, 1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1068 return join ("\t", $var1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1069 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1070 sub di {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1071 my $st = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1072 my $tp = unpack "A2"x(length($st)/2),$st;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1073 my $var1 = substr($tp, 0, 2);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1074 return join ("\t", $var1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1075 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1076 sub tri {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1077 my $st = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1078 my $tp = unpack "A3"x(length($st)/3),$st;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1079 my $var1 = substr($tp, 0, 3);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1080 return join ("\t", $var1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1081 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1082 sub tetra {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1083 my $st = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1084 my $tp = unpack "A4"x(length($st)/4),$st;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1085 my $var1 = substr($tp, 0, 4);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1086 return join ("\t", $var1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1087 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1088 sub penta {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1089 my $st = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1090 my $tp = unpack "A5"x(length($st)/5),$st;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1091 my $var1 = substr($tp, 0, 5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1092 return join ("\t", $var1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1093 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1094
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1095 #xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1096
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1097
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1098 #xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1099 sub t9{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1100 my $input1 = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1101 my $input2 = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1102 my $output = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1103
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1104
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1105 open(IN1,"<$input1") if -e $input1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1106 open(IN2,"<$input2") or die "cannot open file $_[1] : $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1107 open(OUT,">$output") or die "cannot open file $_[2] : $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1108
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1109
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1110 my %seen = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1111 my $prevkey = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1112
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1113 if (-e $input1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1114 while (my $line = <IN1>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1115 chomp($line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1116 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1117 my $key1 = join ("_K10K1_",@fields[0,1,3,4,5]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1118 # print "key in t9 = $key1\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1119 $seen{$key1}++ if ($prevkey ne $key1) ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1120 $prevkey = $key1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1121 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1122 # print "done first hash\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1123 close IN1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1124 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1125
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1126 while (my $line = <IN2>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1127 # print $line, "**\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1128 if (-e $input1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1129 chomp($line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1130 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1131 my $key2 = join ("_K10K1_",@fields[0,1,3,4,5]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1132 if (exists $seen{$key2}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1133 print OUT "$line\n" ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1134 delete $seen{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1135 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1136 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1137 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1138 print OUT "$line\n" ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1139 # print "$line\n" ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1140 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1141 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1142
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1143 close IN2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1144 close OUT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1145 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1146 #xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1147
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1148
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1149 #xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1150
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1151
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1152 sub multiSpecies_compound_microsat_hunter3{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1153
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1154 my $input1 = $_[0]; ###### the *_sput_op4_ii file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1155 my $input2 = $_[1]; ###### looks like this: my $t8humanoutput = $pipedir.$ptag."_nogap_op_unrand2"
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1156 my $output1 = $_[2]; ###### plain microsatellite file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1157 my $output2 = $_[3]; ###### compound microsatellite file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1158 my $org = $_[4]; ###### 1 or 2
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1159 $no_of_species = $_[5];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1160 #print "IN multiSpecies_compound_microsat_hunter3: @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1161 #my @tags = split(/\t/,$info);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1162 sub compoundify;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1163 open(IN,"<$input1") or die "Cannot open file $input1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1164 open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1165 open(OUT,">$output1") or die "Cannot open file $output1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1166 open(OUT2,">$output2") or die "Cannot open file $output2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1167 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1168 $startcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1169 $strandcord = 2 + (4*$no_of_species) + 3 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1170 $endcord = 2 + (4*$no_of_species) + 4 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1171 $microsatcord = 2 + (4*$no_of_species) + 5 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1172 $motifcord = 2 + (4*$no_of_species) + 6 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1173 my $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1174
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1175 my @thresholds = ("0");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1176 push(@thresholds, split(/_/,$_[6]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1177 sub thresholdCheck;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1178 my %micros = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1179 while (my $line = <IN>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1180 # print "$org\t(chr[0-9]+)\t([0-9]+)\t([0-9])+\t \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1181 next if $line =~ /\t\t/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1182 if ($line =~ /^>[A-Za-z0-9_]+\s+([0-9]+)\s+([a-zA-Z0-9]+)\s([a-zA-Z]+[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1183 my $key = join("\t",$1, $2, $3, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1184 # print $key, "#-#-#-#-#-#-#-#\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1185 push (@{$micros{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1186 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1187 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1188 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1189 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1190 close IN;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1191 my @deletedlines = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1192
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1193 my $linecount = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1194
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1195 while(my $sine = <SEQ>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1196 my %microstart=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1197 my %microend=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1198
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1199 my @sields = split(/\t/,$sine);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1200
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1201 my $key = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1202
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1203 if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([a-zA-Z0-9]+)\s([a-zA-Z]+[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1204 $key = join("\t",$1, $2, $3, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1205 # print $key, "<-<-<-<-<-<-<-<\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1206 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1207 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1208 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1209
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1210 if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1211 $linecount++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1212 my @microstring = @{$micros{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1213 my @tempmicrostring = @{$micros{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1214
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1215 foreach my $line (@tempmicrostring){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1216 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1217 my $start = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1218 my $end = $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1219 push (@{$microstart{$start}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1220 push (@{$microend{$end}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1221 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1222 my $firstflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1223 while( my $line =shift(@microstring)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1224 # print "-----------\nline = $line ";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1225 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1226 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1227 my $start = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1228 my $end = $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1229 my $startmicro = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1230 my $endmicro = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1231
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1232 # print "fields=@fields, start = $start end=$end, startcord=$startcord, endcord=$endcord\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1233
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1234 delete ($microstart{$start});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1235 delete ($microend{$end});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1236 my $flag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1237 my $startflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1238 my $endflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1239 my $prestart = $start - $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1240 my $postend = $end + $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1241 my @compoundlines = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1242 my %compoundhash = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1243 push (@compoundlines, $line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1244 push (@{$compoundhash{$line}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1245 my $startrank = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1246 my $endrank = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1247
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1248 while( ($startflag eq "down") || ($endflag eq "down") ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1249 if ((($prestart < 0) && $firstflag eq "up") || (($postend > length($sields[$sequencepos])) && $firstflag eq "up") ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1250 # print "coming to the end of sequence,prestart = $prestart & post end = $postend and sequence length =", length($sields[$sequencepos])," so exiting\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1251 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1252 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1253
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1254 $firstflag = "up";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1255 if ($startflag eq "down"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1256 for my $i ($prestart ... $start){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1257
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1258 if(exists $microend{$i}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1259 chomp $microend{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1260 if(exists $compoundhash{$microend{$i}[0]}) {next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1261 # print "sending from microend $startmicro, $microend{$i}[0] |||\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1262 if (identityMatch_thresholdCheck($startmicro, $microend{$i}[0], $startrank) eq "proceed"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1263 push(@compoundlines, $microend{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1264 # print "accepted\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1265 my @tields = split(/\t/,$microend{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1266 $startmicro = $microend{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1267 chomp $startmicro;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1268 $start = $tields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1269 $flag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1270 $startrank++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1271 # print "startcompund = $microend{$i}[0]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1272 delete $microend{$i};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1273 delete $microstart{$start};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1274 $startflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1275 $prestart = $start - $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1276 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1277 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1278 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1279 $flag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1280 $startflag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1281 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1282 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1283 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1284 $flag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1285 $startflag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1286 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1287 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1288 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1289
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1290 $endrank = $startrank;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1291
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1292 if ($endflag eq "down"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1293 for my $i ($end ... $postend){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1294
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1295 if(exists $microstart{$i} ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1296 chomp $microstart{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1297 if(exists $compoundhash{$microstart{$i}[0]}) {next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1298 # print "sending from microstart $endmicro, $microstart{$i}[0] |||\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1299
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1300 if(identityMatch_thresholdCheck($endmicro,$microstart{$i}[0], $endrank) eq "proceed"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1301 push(@compoundlines, $microstart{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1302 # print "accepted\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1303 my @tields = split(/\t/,$microstart{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1304 $end = $tields[$endcord]-0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1305 $endmicro = $microstart{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1306 $endrank++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1307 chomp $endmicro;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1308 $flag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1309 # print "endcompund = $microstart{$i}[0]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1310 delete $microstart{$i};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1311 delete $microend{$end};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1312 shift @microstring;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1313 $postend = $end + $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1314 $endflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1315 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1316 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1317 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1318 $flag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1319 $endflag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1320 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1321 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1322 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1323 $flag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1324 $endflag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1325 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1326 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1327 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1328 # print "for next turn, flag status: startflag = $startflag and endflag = $endflag \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1329 } #end while( $flag eq "down")
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1330 # print "compoundlines = @compoundlines \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1331 if (scalar (@compoundlines) == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1332 print OUT $line,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1333 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1334 if (scalar (@compoundlines) > 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1335 my $compoundline = compoundify(\@compoundlines, $sields[$sequencepos]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1336 # print $compoundline,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1337 print OUT2 $compoundline,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1338 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1339 } #end foreach my $line (@microstring){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1340 } #if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1341
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1342
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1343 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1344
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1345 close OUT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1346 close OUT2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1347 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1348
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1349
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1350 #------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1351 sub compoundify{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1352 my ($compoundlines, $sequence) = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1353 # print "\nfound to compound : @$compoundlines and$sequence \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1354 my $noOfComps = @$compoundlines;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1355 # print "Number of elements in hash is $noOfComps\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1356 my @starts;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1357 my @ends;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1358 foreach my $line (@$compoundlines){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1359 # print "compoundify.. line = $line \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1360 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1361 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1362 my $start = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1363 my $end = $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1364 # print "start = $start, end = $end \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1365 push(@starts, $start);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1366 push(@ends,$end);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1367 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1368 my @temp = @$compoundlines;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1369 my $startline=$temp[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1370 my @mields = split(/\t/,$startline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1371 my $startcoord = $mields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1372 my $startgapsign=$mields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1373 my @startsorted = sort { $a <=> $b } @starts;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1374 my @endsorted = sort { $a <=> $b } @ends;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1375 my @intervals;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1376 for my $end (0 ... (scalar(@endsorted)-2)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1377 my $interval = substr($sequence,($endsorted[$end]+1),(($startsorted[$end+1])-($endsorted[$end])-1));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1378 push(@intervals,$interval);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1379 # print "interval = $interval =\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1380 # print "substr(sequence,($endsorted[$end]+1),(($startsorted[$end+1])-($endsorted[$end])-1))\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1381 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1382 push(@intervals,"");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1383 my $compoundmicrosat=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1384 my $multiunit="";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1385 foreach my $line (@$compoundlines){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1386 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1387 my $component="[".$fields[$microsatcord]."]".shift(@intervals);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1388 $compoundmicrosat=$compoundmicrosat.$component;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1389 $multiunit=$multiunit."[".$fields[$motifcord]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1390 # print "multiunit = $multiunit\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1391 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1392 my $compoundcopy = $compoundmicrosat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1393 $compoundcopy =~ s/\[|\]//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1394 my $compoundlength = $mields[$startcord] + length($compoundcopy) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1395
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1396
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1397 my $compoundline = join("\t",(@mields[0 ... $infocord], "compound",@mields[$startcord ... $startcord+1],$compoundlength,$compoundmicrosat, $multiunit));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1398 return $compoundline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1399 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1400
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1401 #------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1402
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1403 sub identityMatch_thresholdCheck{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1404 my $line1 = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1405 my $line2 = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1406 my $rank = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1407 my @lields1 = split(/\t/,$line1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1408 my @lields2 = split(/\t/,$line2);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1409 # print "recieved $line1 && $line2\n motif comparison: ", length($lields1[$motifcord])," : ",length($lields2[$motifcord]),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1410
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1411 if (length($lields1[$motifcord]) == length($lields2[$motifcord])){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1412 my $probe = $lields1[$motifcord].$lields1[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1413 #print "$probe :: $lields2[$motifcord]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1414 return "proceed" if $probe =~ /$lields2[$motifcord]/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1415 #print "line recieved\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1416 if ($rank ==1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1417 return "proceed" if thresholdCheck($line1) eq "proceed" && thresholdCheck($line2) eq "proceed";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1418 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1419 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1420 return "proceed" if thresholdCheck($line2) eq "proceed";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1421 return "stop";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1422 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1423 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1424 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1425 if ($rank ==1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1426 return "proceed" if thresholdCheck($line1) eq "proceed" && thresholdCheck($line2) eq "proceed";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1427 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1428 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1429 return "proceed" if thresholdCheck($line2) eq "proceed";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1430 return "stop";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1431 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1432 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1433 return "stop";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1434 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1435 #------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1436
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1437 sub thresholdCheck{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1438 my @checkthresholds=(0,@thresholds);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1439 #print "IN thresholdCheck: @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1440 my $line = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1441 my @lields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1442 return "proceed" if length($lields[$microsatcord]) >= $checkthresholds[length($lields[$motifcord])];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1443 return "stop";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1444 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1445 #xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1446
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1447
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1448 #xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1449
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1450 sub multiSpecies_filtering_interrupted_microsats{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1451 # print "IN multiSpecies_filtering_interrupted_microsats: @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1452 my $unfiltered = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1453 my $filtered = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1454 my $residue = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1455 my $no_of_species = $_[5];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1456 open(UNF,"<$unfiltered") or die "Cannot open file $unfiltered: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1457 open(FIL,">$filtered") or die "Cannot open file $filtered: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1458 open(RES,">$residue") or die "Cannot open file $residue: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1459
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1460 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1461 $startcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1462 $strandcord = 2 + (4*$no_of_species) + 3 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1463 $endcord = 2 + (4*$no_of_species) + 4 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1464 $microsatcord = 2 + (4*$no_of_species) + 5 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1465 $motifcord = 2 + (4*$no_of_species) + 6 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1466
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1467
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1468 my @sub_thresholds = (0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1469
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1470 push(@sub_thresholds, split(/_/,$_[3]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1471 my @thresholds = (0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1472
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1473 push(@thresholds, split(/_/,$_[4]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1474
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1475 while (my $line = <UNF>) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1476 next if $line !~ /[a-z]/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1477 #print $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1478 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1479 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1480 my $motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1481 my $realmotif = $motif;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1482 #print "motif = $motif\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1483 if ($motif =~ /^\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1484 $motif =~ s/^\[//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1485 my @motifs = split(/\]/,$motif);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1486 $realmotif = $motifs[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1487 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1488 # print "realmotif = $realmotif";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1489 my $motif_size = length($realmotif);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1490
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1491 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1492 # print "microsat = $microsat\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1493 $microsat =~ s/^\[|\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1494 my @microsats = split(/\][a-zA-Z|-]*\[/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1495
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1496 $microsat = join("",@microsats);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1497 if (length($microsat) < $thresholds[$motif_size]) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1498 # print length($microsat)," < ",$thresholds[$motif_size],"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1499 print RES $line,"\n"; next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1500 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1501 my @lengths = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1502 foreach my $mic (@microsats){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1503 push(@lengths, length($mic));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1504 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1505 if (largest_microsat(@lengths) < $sub_thresholds[$motif_size]) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1506 # print largest_microsat(@lengths)," < ",$sub_thresholds[$motif_size],"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1507 print RES $line,"\n"; next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1508 else {print FIL $line,"\n"; next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1509 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1510 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1511 close FIL;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1512 close RES;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1513
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1514 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1515
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1516 sub largest_microsat{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1517 my $counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1518 my($max) = shift(@_);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1519 foreach my $temp (@_) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1520 #print "finding largest array: $maxcounter \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1521 if($temp > $max){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1522 $max = $temp;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1523 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1524 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1525 return($max);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1526 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1527
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1528 #xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1529
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1530
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1531 #xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1532 sub multiSpecies_compound_microsat_analyzer{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1533 ####### PARAMETER ########
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1534 ##########################
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1535
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1536 my $input1 = $_[0]; ###### the *_sput_op4_ii file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1537 my $input2 = $_[1]; ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1538 my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1539 my $output2 = $_[3]; ###### the pure compound microsatellites
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1540 my $org = $_[4];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1541 my $no_of_species = $_[5];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1542 # print "IN multiSpecies_compound_microsat_analyzer: $input1\n $input2\n $output1\n $output2\n $org\n $no_of_species\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1543 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1544 $typecord = 2 + (4*$no_of_species) + 1 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1545 $startcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1546 $strandcord = 2 + (4*$no_of_species) + 3 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1547 $endcord = 2 + (4*$no_of_species) + 4 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1548 $microsatcord = 2 + (4*$no_of_species) + 5 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1549 $motifcord = 2 + (4*$no_of_species) + 6 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1550
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1551 open(IN,"<$input1") or die "Cannot open file $input1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1552 open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1553
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1554 open(OUT,">$output1") or die "Cannot open file $output1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1555 open(OUT2,">$output2") or die "Cannot open file $output2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1556
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1557
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1558 # print "opened files \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1559 my %micros = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1560 my $keycounter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1561 my $linecounter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1562 while (my $line = <IN>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1563 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1564 if ($line =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1565 my $key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1566 push (@{$micros{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1567 $keycounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1568 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1569 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1570 # print "no key\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1571 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1572 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1573 close IN;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1574 my @deletedlines = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1575 # print "done hash . linecounter=$linecounter, keycounter=$keycounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1576 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1577 # NOW READING THE SEQUENCE FILE
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1578 my $keyfound=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1579 my $keyexists=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1580 my $inter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1581 my $pure=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1582
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1583 while(my $sine = <SEQ>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1584 my %microstart=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1585 my %microend=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1586 my @sields = split(/\t/,$sine);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1587 my $key = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1588 if ($sine =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1589 $key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1590 # print $sine;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1591 # print $key;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1592 $keyfound++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1593 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1594 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1595
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1596 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1597 #<STDIN> if !defined $key;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1598
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1599 if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1600 $keyexists++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1601 my @microstring = @{$micros{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1602
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1603 my @filteredmicrostring;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1604
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1605 foreach my $line (@microstring){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1606 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1607 my $copy_line = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1608 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1609 my $start = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1610 my $end = $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1611 # FOR COMPOUND MICROSATELLITES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1612 if ($fields[$typecord] eq "compound"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1613 $line = compound_microsat_analyser($line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1614 if ($line eq "NULL") {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1615 print OUT2 "$copy_line\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1616 $pure++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1617 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1618 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1619 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1620 print OUT "$line\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1621 $inter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1622 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1623 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1624 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1625 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1626
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1627 } #if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1628 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1629 close OUT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1630 close OUT2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1631 # print "keyfound=$keyfound, keyexists=$keyexists, pure=$pure, inter=$inter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1632 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1633
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1634 sub compound_microsat_analyser{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1635 my $line = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1636 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1637 my $motifline = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1638 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1639 $motifline =~ s/^\[|\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1640 $microsat =~ s/^\[|\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1641 $microsat =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1642 my @interruptions = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1643 my @motields = split(/\]\[/,$motifline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1644 my @microields = split(/\][a-zA-Z|-]*\[/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1645 my @inields = split(/[.*]/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1646 shift @inields;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1647 my @motifcount = scalar(@motields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1648 my $prevmotif = $motields[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1649 my $prevmicro = $microields[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1650 my $prevphase = substr($microields[0],-(length($motields[0])),length($motields[0]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1651 my $localflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1652 my @infoarray = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1653
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1654 for my $l (1 ... (scalar(@motields)-1)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1655 my $probe = $prevmotif.$prevmotif;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1656 if (length $prevmotif != length $motields[$l]) {$localflag = "up"; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1657
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1658 if ($probe =~ /$motields[$l]/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1659 my $curr_endphase = substr($microields[$l],-length($motields[$l]),length($motields[$l]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1660 my $curr_startphase = substr($microields[$l],0,length($motields[$l]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1661 if ($curr_startphase =~ /$prevphase/i) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1662 $infoarray[$l-1] = "insertion";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1663 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1664 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1665 $infoarray[$l-1] = "indel/substitution";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1666 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1667
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1668 $prevmotif = $motields[$l]; $prevmicro = $microields[$l]; $prevphase = $curr_endphase;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1669 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1670 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1671 else {$localflag = "up"; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1672 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1673 if ($localflag eq 'up') {return "NULL";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1674
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1675 if (length($prevmotif) == 1) {$fields[$typecord] = "mononucleotide";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1676 if (length($prevmotif) == 2) {$fields[$typecord] = "dinucleotide";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1677 if (length($prevmotif) == 3) {$fields[$typecord] = "trinucleotide";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1678 if (length($prevmotif) == 4) {$fields[$typecord] = "tetranucleotide";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1679 if (length($prevmotif) == 5) {$fields[$typecord] = "pentanucleotide";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1680
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1681 @microields = split(/[\[|\]]/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1682 my @microsats = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1683 my @positions = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1684 my $lengthtracker = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1685
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1686 for my $i (0 ... (scalar(@microields ) - 1)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1687 if ($i%2 == 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1688 push(@microsats,$microields[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1689 $lengthtracker = $lengthtracker + length($microields[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1690
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1691 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1692 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1693 push(@interruptions,$microields[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1694 push(@positions, $lengthtracker+1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1695 $lengthtracker = $lengthtracker + length($microields[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1696 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1697
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1698 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1699 my $returnline = join("\t",(join("\t",@fields),join(",",(@infoarray)),join(",",(@interruptions)),join(",",(@positions)),scalar(@interruptions)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1700 return($returnline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1701 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1702
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1703 #xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1704
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1705
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1706 #xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1707
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1708 sub multiSpecies_compoundClarifyer{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1709 # print "IN multiSpecies_compoundClarifyer: @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1710 my $input1 = $_[0]; ###### the *_sput_compound
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1711 my $input2 = $_[1]; ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1712 my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1713 my $output2 = $_[3]; ###### compound file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1714 my $org = $_[4];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1715 my $no_of_species = $_[5];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1716 @thresholds = "0";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1717 push(@thresholds, split(/_/,$_[6]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1718
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1719
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1720 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1721 $typecord = 2 + (4*$no_of_species) + 1 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1722 $startcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1723 $strandcord = 2 + (4*$no_of_species) + 3 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1724 $endcord = 2 + (4*$no_of_species) + 4 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1725 $microsatcord = 2 + (4*$no_of_species) + 5 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1726 $motifcord = 2 + (4*$no_of_species) + 6 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1727 $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1728
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1729 $interr_poscord = $motifcord + 3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1730 $no_of_interruptionscord = $motifcord + 4;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1731 $interrcord = $motifcord + 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1732 $interrtypecord = $motifcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1733
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1734
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1735 open(IN,"<$input1") or die "Cannot open file $input1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1736 open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1737
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1738 open(INT,">$output1") or die "Cannot open file $output2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1739 open(COMP,">$output2") or die "Cannot open file $output2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1740 #open(CH,">changed") or die "Cannot open file changed $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1741
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1742 # print "opened files \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1743 my $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1744 my $microcounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1745
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1746 my %micros = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1747 while (my $line = <IN>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1748 # print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1749 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1750 if ($line =~ /($focalspec)\s+([0-9a-zA-Z_\-]+)\s+([0-9]+)\s+([0-9]+)/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1751 my $key = join("\t",$1, $2, $3, $4);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1752 # print $key, "#-#-#-#-#-#-#-#\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1753 # print "key = $key\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1754 push (@{$micros{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1755 $microcounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1756 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1757 else {#print $line," key not made\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1758 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1759 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1760 # print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1761 close IN;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1762 my @deletedlines = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1763 # print "done hash \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1764 $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1765 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1766 # NOW READING THE SEQUENCE FILE
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1767 my @microsat_types = qw(_ mononucleotide dinucleotide trinucleotide tetranucleotide);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1768 $printer = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1769
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1770 while(my $sine = <SEQ>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1771 my %microstart=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1772 my %microend=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1773 my @sields = split(/\t/,$sine);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1774 my $key = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1775
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1776 # print "sine = $sine. focalspec = $focalspec \n"; #<STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1777
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1778 if ($sine =~ /($focalspec)\s+([0-9a-zA-Z_\-]+)\s+([0-9]+)\s+([0-9]+)/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1779
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1780 # if ($sine =~ /([a-z0-9A-Z]+)\s+([0-9a-zA-Z_]+)\s+([0-9]+)\s+([0-9]+)\s+[\+|\-]\s+([a-z0-9A-Z]+)\s+([0-9a-zA-Z_]+)\s+([0-9]+)\s+([0-9]+)\s+[\+|\-]\s+([a-z0-9A-Z]+)\s+([0-9a-zA-Z_]+)\s+([0-9]+)\s+([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1781 $key = join("\t",$1, $2, $3, $4);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1782 # print "key = $key\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1783 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1784 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1785 # print "no key in $sine\nfor pattern ([a-z0-9A-Z]+) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+) [\+|\-] (a-z0-9A-Z) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+) [\+|\-] (a-z0-9A-Z) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+) / \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1786 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1787
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1788 if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1789 my @microstring = @{$micros{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1790 delete $micros{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1791
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1792 foreach my $line (@microstring){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1793 # print "#---------#---------#---------#---------#---------#---------#---------#---------\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1794 # print "microsat = $line" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1795 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1796 my $copy_line = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1797 my @mields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1798 my @fields = @mields;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1799 my $start = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1800 my $end = $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1801 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1802 my $motifline = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1803 my $microsatcopy = $microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1804 my $positioner = $microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1805 $positioner =~ s/[a-zA-Z|-]/_/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1806 $microsatcopy =~ s/^\[|\]$//gs;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1807 chomp $microsatcopy;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1808 my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1809 my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1810 my $absolutstart = 1; my $absolutend = $absolutstart + ($end-$start);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1811 # print "absolut: start = $absolutstart, end = $absolutend\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1812 shift @inields;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1813 #print "inields =@inields<\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1814 $motifline =~ s/^\[|\]$//gs;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1815 chomp $motifline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1816 #print "microsat = $microsat, its copy = $microsatcopy motifline = $motifline<\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1817 my @motields = split(/\]\[/,$motifline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1818 my $seq = $microsatcopy;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1819 $seq =~ s/\[|\]//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1820 my $seqlen = length($seq);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1821 $seq = " ".$seq;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1822
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1823 my $longestmotif_no = longest_array_element(@motields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1824 my $shortestmotif_no = shortest_array_element(@motields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1825 #print "shortest motif = $motields[$shortestmotif_no], longest motif = $motields[$longestmotif_no] \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1826
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1827 my $search = $motields[$longestmotif_no].$motields[$longestmotif_no];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1828 if ((length($motields[$longestmotif_no]) == length($motields[$shortestmotif_no])) && ($search !~ /$motields[$shortestmotif_no]/) ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1829 print COMP $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1830 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1831 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1832
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1833 my @shortestmotif_nos = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1834 for my $m (0 ... $#motields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1835 push(@shortestmotif_nos, $m) if (length($motields[$m]) == length($motields[$shortestmotif_no]) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1836 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1837 ## LOOKING AT LEFT OF THE SHORTEST MOTIF------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1838 my $newleft =();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1839 my $leftstopper = 0; my $rightstopper = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1840 foreach my $shortmotif_no (@shortestmotif_nos){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1841 next if $shortmotif_no == 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1842 my $last_left = $shortmotif_no; #$#motields;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1843 my $last_hitter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1844 for (my $i =($shortmotif_no-1); $i>=0; $i--){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1845 my $search = $motields[$shortmotif_no];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1846 if (length($motields[$shortmotif_no]) == 1){ $search = $motields[$shortmotif_no].$motields[$shortmotif_no] ;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1847 if( (length($motields[$i]) > length($motields[$shortmotif_no])) && length($microields[$i]) > (2.5 * length($motields[$i])) ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1848 $last_hitter = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1849 $last_left = $i+1; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1850 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1851 my $probe = $motields[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1852 if (length($motields[$shortmotif_no]) == length($motields[$i])) {$probe = $motields[$i].$motields[$i];}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1853
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1854 if ($probe !~ /$search/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1855 $last_hitter = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1856 $last_left = $i+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1857 # print "hit the last match: before $microields[$i]..last left = $last_left.. exiting.\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1858 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1859 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1860 $last_left--;$last_hitter = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1861 # print "passed tests, last left = $last_left\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1862 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1863 # print "comparing whether $last_left < $shortmotif_no, lasthit = $last_hitter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1864 if (($last_left) < $shortmotif_no && $last_hitter == 1) {$leftstopper=0; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1865 else {$leftstopper = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1866 # print "leftstopper = 1\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1867 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1868 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1869
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1870 ## LOOKING AT LEFT OF THE SHORTEST MOTIF------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1871 my $newright =();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1872 foreach my $shortmotif_no (@shortestmotif_nos){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1873 next if $shortmotif_no == $#motields;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1874 my $last_right = $shortmotif_no;# -1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1875 for my $i ($shortmotif_no+1 ... $#motields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1876 my $search = $motields[$shortmotif_no];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1877 if (length($motields[$shortmotif_no]) == 1 ){ $search = $motields[$shortmotif_no].$motields[$shortmotif_no] ;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1878 if ( (length($motields[$i]) > length($motields[$shortmotif_no])) && length($microields[$i]) > (2.5 * length($motields[$i])) ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1879 $last_right = $i-1; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1880 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1881 my $probe = $motields[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1882 if (length($motields[$shortmotif_no]) == length($motields[$i])) {$probe = $motields[$i].$motields[$i];}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1883 if ( $probe !~ /$search/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1884 $last_right = $i-1; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1885 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1886 $last_right++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1887 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1888 if (($last_right) > $shortmotif_no) {$rightstopper=0; last;# print "rightstopper = 0\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1889 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1890 else {$rightstopper = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1891 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1892 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1893
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1894
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1895 if ($rightstopper == 1 && $leftstopper == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1896 print COMP $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1897 # print "rightstopper == 1 && leftstopper == 1\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1898 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1899 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1900
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1901 # print "pased initial testing phase \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1902 my @outputs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1903 my @orig_starts = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1904 my @orig_ends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1905 for my $mic (0 ... $#microields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1906 my $miclen = length($microields[$mic]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1907 my $microleftlen = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1908 #print "\nmic = $mic\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1909 if($mic > 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1910 for my $submin (0 ... $mic-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1911 my $interval = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1912 if (!exists $inields[$submin]) {$interval = "";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1913 else {$interval = $inields[$submin];}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1914 #print "inield =$interval< and microield =$microields[$submin]<\n ";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1915 $microleftlen = $microleftlen + length($microields[$submin]) + length($interval);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1916 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1917 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1918 push(@orig_starts,($microleftlen+1));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1919 push(@orig_ends, ($microleftlen+1 + $miclen -1));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1920 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1921
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1922 ############# F I N A L L Y S T U D Y I N G S E Q U E N C E S #########@@@@#########@@@@#########@@@@#########@@@@#########@@@@
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1923
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1924
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1925 for my $mic (0 ... $#microields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1926 my $miclen = length($microields[$mic]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1927 my $microleftlen = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1928 if($mic > 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1929 for my $submin (0 ... $mic-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1930 # if(!exists $inields[$submin]) {$inields[$submin] = "";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1931 my $interval = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1932 if (!exists $inields[$submin]) {$interval = "";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1933 else {$interval = $inields[$submin];}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1934 #print "inield =$interval< and microield =$microields[$submin]<\n ";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1935 $microleftlen = $microleftlen + length($microields[$submin]) + length($interval);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1936 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1937 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1938 $fields[$startcord] = $microleftlen+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1939 $fields[$endcord] = $fields[$startcord] + $miclen -1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1940 $fields[$typecord] = $microsat_types[length($motields[$mic])];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1941 $fields[$microsatcord] = $microields[$mic];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1942 $fields[$motifcord] = $motields[$mic];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1943 my $templine = join("\t", (@fields[0 .. $motifcord]) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1944 my $orig_templine = join("\t", (@fields[0 .. $motifcord]) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1945 my $newline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1946 my $lefter = 1; my $righter = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1947 if ( $fields[$startcord] < 2){$lefter = 0;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1948 if ($fields[$endcord] == $seqlen){$righter = 0;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1949
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1950 while($lefter == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1951 $newline = left_extender($templine, $seq,$org);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1952 # print "returned line from left extender= $newline \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1953 if ($newline eq $templine){$templine = $newline; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1954 else {$templine = $newline;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1955
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1956 if (left_extention_permission_giver($templine) eq "no") {last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1957 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1958 while($righter == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1959 $newline = right_extender($templine, $seq,$org);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1960 # print "returned line from right extender= $newline \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1961 if ($newline eq $templine){$templine = $newline; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1962 else {$templine = $newline;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1963 if (right_extention_permission_giver($templine) eq "no") {last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1964 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1965 my @tempfields = split(/\t/,$templine);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1966 $tempfields[$microsatcord] =~ s/\]|\[//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1967 $tempfields[$motifcord] =~ s/^\[|\]$//gs;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1968 my @tempmotields = split(/\]\[/,$tempfields[$motifcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1969
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1970 if (scalar(@tempmotields) == 1 && $templine eq $orig_templine) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1971 # print "scalar ( tempmotields) = 1\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1972 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1973 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1974 my $prevmotif = shift(@tempmotields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1975 my $stopper = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1976
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1977 foreach my $tempmot (@tempmotields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1978 if (length($tempmot) != length($prevmotif)) {$stopper = 1; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1979 my $search = $prevmotif.$prevmotif;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1980 if ($search !~ /$tempmot/) {$stopper = 1; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1981 $prevmotif = $tempmot;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1982 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1983 if ( $stopper == 1) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1984 # print "length tempmot != length prevmotif\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1985 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1986 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1987 my $lastend = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1988 #----------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1989 my $left_captured = (); my $right_captured = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1990 my $left_bp = (); my $right_bp = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1991 # print "new startcord = $tempfields[$startcord] , new endcord = $tempfields[$endcord].. orig strts = @orig_starts and orig ends = @orig_ends\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1992 for my $o (0 ... $#orig_starts){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1993 # print "we are talking abut tempstart:$tempfields[$startcord] >= origstart:$lastend && tempstart:$tempfields[$startcord] <= origend: $orig_ends[$o] \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1994 # print "we are talking abut tempend:$tempfields[$endcord] >= origstart:$lastend && tempstart:$tempfields[$endcord] >= origend: $orig_ends[$o] \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1995
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1996 if (($tempfields[$startcord] > $lastend) && ($tempfields[$startcord] <= $orig_ends[$o])){ # && ($tempfields[$startcord] != $fields[$startcord])
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1997 # print "motif captured on left is $microields[$o] from $microsat\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1998 $left_captured = $o;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
1999 $left_bp = $orig_ends[$o] - $tempfields[$startcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2000 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2001 elsif ($tempfields[$endcord] > $lastend && $tempfields[$endcord] <= $orig_ends[$o]){ #&& $tempfields[$endcord] != $fields[$endcord])
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2002 # print "motif captured on right is $microields[$o] from $microsat\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2003 $right_captured = $o;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2004 $right_bp = $tempfields[$endcord] - $orig_starts[$o] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2005 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2006 $lastend = $orig_ends[$o]
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2007 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2008 # print "leftcaptured = $left_captured, right = $right_captured\n" if $printer==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2009 my $leftmotif = (); my $left_trashed = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2010 if ($tempfields[$startcord] != $fields[$startcord]) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2011 $leftmotif = $motields[$left_captured];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2012 # print "$left_captured in @microields: $motields[$left_captured]\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2013 if ( $left_captured !~ /[0-9]+/) {#print $line,"\n", $templine,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2014 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2015 $left_trashed = length($microields[$left_captured]) - $left_bp;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2016 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2017 my $rightmotif = (); my $right_trashed = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2018 if ($tempfields[$endcord] != $fields[$endcord]) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2019 # print "$right_captured in @microields: $motields[$right_captured]\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2020 $rightmotif = $motields[$right_captured];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2021 $right_trashed = length($microields[$right_captured]) - $right_bp;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2022 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2023
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2024 ########## P A R A M S #####################@@@@#########@@@@#########@@@@#########@@@@#########@@@@#########@@@@#########@@@@
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2025 $stopper = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2026 my $deletioner = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2027 #if($tempfields[$startcord] != $fields[$startcord]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2028 # print "enter left: tempfields,startcord : $tempfields[$startcord] != $absolutstart && left_captured: $left_captured != 0 \n" if $printer==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2029 if ($left_captured != 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2030 # print "at line 370, going: 0 ... $left_captured-1 \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2031 for my $e (0 ... $left_captured-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2032 if( length($motields[$e]) > 2 && length($microields[$e]) > (3* length($motields[$e]) )){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2033 # print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2034 $deletioner++; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2035 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2036 if( length($motields[$e]) == 2 && length($microields[$e]) > (3* length($motields[$e]) )){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2037 # print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2038 $deletioner++; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2039 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2040 if( length($motields[$e]) == 1 && length($microields[$e]) > (4* length($motields[$e]) )){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2041 # print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2042 $deletioner++; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2043 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2044 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2045 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2046 #}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2047 # print "after left search, deletioner = $deletioner\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2048 if ($deletioner >= 1) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2049 # print "deletioner = $deletioner\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2050 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2051 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2052
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2053 $deletioner = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2054
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2055 #if($tempfields[$endcord] != $fields[$endcord]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2056 # print "if tempfields endcord: $tempfields[$endcord] != absolutend: $absolutend\n and $right_captured != $#microields\n" if $printer==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2057 if ($right_captured != $#microields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2058 # print "at line 394, going: $right_captured+1 ... $#microields \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2059 for my $e ($right_captured+1 ... $#microields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2060 if( length($motields[$e]) > 2 && length($microields[$e]) > (3* length($motields[$e])) ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2061 # print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2062 $deletioner++; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2063 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2064 if( length($motields[$e]) == 2 && length($microields[$e]) > (3* length($motields[$e]) )){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2065 # print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2066 $deletioner++; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2067 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2068 if( length($motields[$e]) == 1 && length($microields[$e]) > (4* length($motields[$e]) )){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2069 # print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2070 $deletioner++; last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2071 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2072 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2073 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2074 #}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2075 # print "deletioner = $deletioner\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2076 if ($deletioner >= 1) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2077 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2078 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2079 my $leftMotifs_notCaptured = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2080 my $rightMotifs_notCaptured = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2081
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2082 if ($tempfields[$startcord] != $fields[$startcord] ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2083 #print "in left params: (length($leftmotif) == 1 && $tempfields[$startcord] != $fields[$startcord]) ... and .... $left_trashed > (1.5* length($leftmotif]) && ($tempfields[$startcord] != $fields[$startcord])\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2084 if (length($leftmotif) == 1 && $left_trashed > 3){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2085 # print "invaded left motif is long mononucleotide" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2086 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2087
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2088 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2089 elsif ((length($leftmotif) != 1 && $left_trashed > ( thrashallow($leftmotif)) && ($tempfields[$startcord] != $fields[$startcord]) ) ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2090 # print "invaded left motif too long" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2091 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2092 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2093 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2094 if ($tempfields[$endcord] != $fields[$endcord] ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2095 #print "in right params: after $tempfields[$endcord] != $fields[$endcord] ..... (length($rightmotif)==1 && $tempfields[$endcord] != $fields[$endcord]) ... and ... $right_trashed > (1.5* length($rightmotif))\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2096 if (length($rightmotif)==1 && $right_trashed){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2097 # print "invaded right motif is long mononucleotide" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2098 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2099
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2100 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2101 elsif (length($rightmotif) !=1 && ($right_trashed > ( thrashallow($rightmotif)) && $tempfields[$endcord] != $fields[$endcord])){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2102 # print "invaded right motif too long" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2103 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2104
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2105 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2106 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2107 push @outputs, $templine;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2108 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2109 if (scalar(@outputs) == 0){ print COMP $line; next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2110 # print "outputs are:", join("\n",@outputs),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2111 if (scalar(@outputs) == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2112 my @oields = split(/\t/,$outputs[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2113 my $start = $oields[$startcord]+$mields[$startcord]-1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2114 my $end = $start+($oields[$endcord]-$oields[$startcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2115 $oields[$startcord] = $start; $oields[$endcord] = $end;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2116 print INT join("\t",@oields), "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2117 # print CH $line,;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2118 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2119 if (scalar(@outputs) > 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2120 my $motif_min = 10;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2121 my $chosen_one = $outputs[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2122 foreach my $micro (@outputs){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2123 my @oields = split(/\t/,$micro);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2124 my $tempmotif = $oields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2125 $tempmotif =~ s/^\[|\]$//gs;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2126 my @omots = split(/\]\[/, $tempmotif);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2127 # print "motif_min = $motif_min, current motif = $tempmotif\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2128 my $start = $oields[$startcord]+$mields[$startcord]-1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2129 my $end = $start+($oields[$endcord]-$oields[$startcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2130 $oields[$startcord] = $start; $oields[$endcord] = $end;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2131 if(length($omots[0]) < $motif_min) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2132 $chosen_one = join("\t",@oields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2133 $motif_min = length($omots[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2134 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2135 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2136 print INT $chosen_one, "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2137 # print "chosen one is ".$chosen_one, "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2138 # print CH $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2139
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2140
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2141 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2142
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2143 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2144
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2145 } #if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2146 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2147 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2148 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2149 close INT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2150 close COMP;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2151 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2152 sub left_extender{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2153 #print "left extender\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2154 my ($line, $seq, $org) = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2155 # print "in left extender... line passed = $line and sequence is $seq\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2156 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2157 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2158 my $rstart = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2159 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2160 $microsat =~ s/\[|\]//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2161 my $rend = $rstart + length($microsat)-1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2162 $microsat =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2163 my $motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2164 my $firstmotif = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2165
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2166 if ($motif =~ /^\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2167 $motif =~ s/^\[//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2168 $motif =~ /([a-zA-Z]+)\].*/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2169 $firstmotif = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2170 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2171 else {$firstmotif = $motif;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2172
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2173 #print "hacked microsat = $microsat, motif = $motif, firstmotif = $firstmotif\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2174 my $leftphase = substr($microsat, 0,length($firstmotif));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2175 my $phaser = $leftphase.$leftphase;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2176 my @phase = split(/\s*/,$leftphase);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2177 my @phases;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2178 my @copy_phases = @phases;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2179 my $crawler=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2180 for (0 ... (length($leftphase)-1)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2181 push(@phases, substr($phaser, $crawler, length($leftphase)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2182 $crawler++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2183 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2184
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2185 my $start = $rstart;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2186 my $end = $rend;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2187
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2188 my $leftseq = substr($seq, 0, $start);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2189 # print "left phases are @phases , start = $start left sequence = ",substr($leftseq, -10),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2190 my @extentions = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2191 my @trappeds = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2192 my @intervalposs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2193 my @trappedposs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2194 my @trappedphases = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2195 my @intervals = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2196 my $firstmotif_length = length($firstmotif);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2197 foreach my $phase (@phases){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2198 # print "left phase\t",substr($leftseq, -10),"\t$phase\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2199 # print "search patter = (($phase)+([a-zA-Z|-]{0,$firstmotif_length})) \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2200 if ($leftseq =~ /(($phase)+([a-zA-Z|-]{0,$firstmotif_length}))$/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2201 # print "in left pattern\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2202 my $trapped = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2203 my $trappedpos = length($leftseq)-length($trapped);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2204 my $interval = $3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2205 my $intervalpos = index($trapped, $interval) + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2206 # print "left trapped = $trapped, interval = $interval, intervalpos = $intervalpos\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2207
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2208 my $extention = substr($trapped, 0, length($trapped)-length($interval));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2209 my $leftpeep = substr($seq, 0, ($start-length($trapped)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2210 my @passed_overhangs;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2211
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2212 for my $i (1 ... length($phase)-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2213 my $overhang = substr($phase, -length($phase)+$i);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2214 # print "current overhang = $overhang, leftpeep = ",substr($leftpeep,-10)," whole sequence = ",substr($seq, ($end - ($end-$start) - 20), (($end-$start)+20)),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2215 #TEMPORARY... BETTER METHOD NEEDED
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2216 $leftpeep =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2217 if ($leftpeep =~ /$overhang$/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2218 push(@passed_overhangs,$overhang);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2219 # print "l overhang\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2220 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2221 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2222
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2223 if(scalar(@passed_overhangs)>0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2224 my $overhang = $passed_overhangs[longest_array_element(@passed_overhangs)];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2225 $extention = $overhang.$extention;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2226 $trapped = $overhang.$trapped;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2227 #print "trapped extended to $trapped \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2228 $trappedpos = length($leftseq)-length($trapped);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2229 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2230
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2231 push(@extentions,$extention);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2232 # print "extentions = @extentions \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2233
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2234 push(@trappeds,$trapped );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2235 push(@intervalposs,length($extention)+1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2236 push(@trappedposs, $trappedpos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2237 # print "trappeds = @trappeds\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2238 push(@trappedphases, substr($extention,0,length($phase)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2239 push(@intervals, $interval);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2240 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2241 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2242 if (scalar(@trappeds == 0)) {return $line;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2243
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2244 my $nikaal = shortest_array_element(@intervals);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2245
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2246 if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2247 $fields[$motifcord] = "[".$trappedphases[$nikaal]."]".$fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2248 ##print "new fields 9 = $fields[9]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2249 $fields[$startcord] = $fields[$startcord]-length($trappeds[$nikaal]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2250
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2251 if($fields[$microsatcord] !~ /^\[/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2252 $fields[$microsatcord] = "[".$fields[$microsatcord]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2253 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2254
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2255 $fields[$microsatcord] = "[".$extentions[$nikaal]."]".$intervals[$nikaal].$fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2256
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2257 if (exists ($fields[$motifcord+1])){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2258 $fields[$motifcord+1] = "indel/deletion,".$fields[$motifcord+1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2259 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2260 else{$fields[$motifcord+1] = "indel/deletion";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2261 ##print "new fields 14 = $fields[14]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2262
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2263 if (exists ($fields[$motifcord+2])){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2264 $fields[$motifcord+2] = $intervals[$nikaal].",".$fields[$motifcord+2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2265 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2266 else{$fields[$motifcord+2] = $intervals[$nikaal];}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2267 my @seventeen=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2268 if (exists ($fields[$motifcord+3])){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2269 @seventeen = split(/,/,$fields[$motifcord+3]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2270 # #print "scalarseventeen =@seventeen<-\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2271 for (0 ... scalar(@seventeen)-1) {$seventeen[$_] = $seventeen[$_]+length($trappeds[$nikaal]);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2272 $fields[$motifcord+3] = ($intervalposs[$nikaal]).",".join(",",@seventeen);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2273 $fields[$motifcord+4] = $fields[$motifcord+4]+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2274 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2275
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2276 else {$fields[$motifcord+3] = $intervalposs[$nikaal]; $fields[$motifcord+4]=1}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2277
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2278 ##print "new fields 16 = $fields[16]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2279 ##print "new fields 17 = $fields[17]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2280
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2281
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2282 my $returnline = join("\t",@fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2283 my $pastline = $returnline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2284 if ($fields[$microsatcord] =~ /\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2285 $returnline = multiSpecies_compoundClarifyer_merge($returnline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2286 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2287 return $returnline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2288 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2289 sub right_extender{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2290 my ($line, $seq, $org) = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2291 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2292 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2293 my $rstart = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2294 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2295 $microsat =~ s/\[|\]//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2296 my $rend = $rstart + length($microsat)-1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2297 $microsat =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2298 my $motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2299 my $temp_lastmotif = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2300
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2301 if ($motif =~ /\]$/s){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2302 $motif =~ s/\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2303 $motif =~ /.*\[([a-zA-Z]+)/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2304 $temp_lastmotif = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2305 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2306 else {$temp_lastmotif = $motif;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2307 my $lastmotif = substr($microsat,-length($temp_lastmotif));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2308 ##print "hacked microsat = $microsat, motif = $motif, lastmotif = $lastmotif\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2309 my $rightphase = substr($microsat, -length($lastmotif));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2310 my $phaser = $rightphase.$rightphase;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2311 my @phase = split(/\s*/,$rightphase);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2312 my @phases;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2313 my @copy_phases = @phases;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2314 my $crawler=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2315 for (0 ... (length($rightphase)-1)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2316 push(@phases, substr($phaser, $crawler, length($rightphase)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2317 $crawler++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2318 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2319
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2320 my $start = $rstart;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2321 my $end = $rend;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2322
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2323 my $rightseq = substr($seq, $end+1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2324 my @extentions = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2325 my @trappeds = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2326 my @intervalposs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2327 my @trappedposs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2328 my @trappedphases = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2329 my @intervals = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2330 my $lastmotif_length = length($lastmotif);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2331 foreach my $phase (@phases){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2332 if ($rightseq =~ /^(([a-zA-Z|-]{0,$lastmotif_length}?)($phase)+)/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2333 my $trapped = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2334 my $trappedpos = $end+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2335 my $interval = $2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2336 my $intervalpos = index($trapped, $interval) + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2337
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2338 my $extention = substr($trapped, length($interval));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2339 my $rightpeep = substr($seq, ($end+length($trapped))+1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2340 my @passed_overhangs = "";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2341
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2342 #TEMPORARY... BETTER METHOD NEEDED
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2343 $rightpeep =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2344
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2345 for my $i (1 ... length($phase)-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2346 my $overhang = substr($phase,0, $i);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2347 # #print "current extention = $extention, overhang = $overhang, rightpeep = ",substr($rightpeep,0,10),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2348 if ($rightpeep =~ /^$overhang/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2349 push(@passed_overhangs, $overhang);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2350 # #print "r overhang\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2351 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2352 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2353 if (scalar(@passed_overhangs) > 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2354 my $overhang = @passed_overhangs[longest_array_element(@passed_overhangs)];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2355 $extention = $extention.$overhang;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2356 $trapped = $trapped.$overhang;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2357 # #print "trapped extended to $trapped \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2358 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2359
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2360 push(@extentions,$extention);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2361 ##print "extentions = @extentions \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2362
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2363 push(@trappeds,$trapped );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2364 push(@intervalposs,$intervalpos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2365 push(@trappedposs, $trappedpos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2366 # #print "trappeds = @trappeds\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2367 push(@trappedphases, substr($extention,0,length($phase)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2368 push(@intervals, $interval);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2369 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2370 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2371 if (scalar(@trappeds == 0)) {return $line;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2372
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2373 # my $nikaal = longest_array_element(@trappeds);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2374 my $nikaal = shortest_array_element(@intervals);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2375
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2376 # #print "longest element found = $nikaal \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2377
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2378 if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2379 $fields[$motifcord] = $fields[$motifcord]."[".$trappedphases[$nikaal]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2380 ##print "new fields 9 = $fields[9]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2381 $fields[$endcord] = $fields[$endcord] + length($trappeds[$nikaal]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2382
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2383 ##print "new fields 11 = $fields[11]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2384
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2385 if($fields[$microsatcord] !~ /^\[/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2386 $fields[$microsatcord] = "[".$fields[$microsatcord]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2387 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2388
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2389 $fields[$microsatcord] = $fields[$microsatcord].$intervals[$nikaal]."[".$extentions[$nikaal]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2390 ##print "new fields 12 = $fields[12]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2391
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2392 ##print "scalar of fields = ",scalar(@fields),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2393 if (exists ($fields[$motifcord+1])){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2394 # print " print fields = @fields.. scalar=", scalar(@fields),".. motifcord+1 = $motifcord + 1 \n " if !exists $fields[$motifcord+1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2395 # <STDIN> if !exists $fields[$motifcord+1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2396 $fields[$motifcord+1] = $fields[$motifcord+1].",indel/deletion";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2397 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2398 else{$fields[$motifcord+1] = "indel/deletion";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2399 ##print "new fields 14 = $fields[14]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2400
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2401 if (exists ($fields[$motifcord+2])){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2402 $fields[$motifcord+2] = $fields[$motifcord+2].",".$intervals[$nikaal];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2403 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2404 else{$fields[$motifcord+2] = $intervals[$nikaal];}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2405 ##print "new fields 15 = $fields[15]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2406
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2407 my @seventeen=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2408 if (exists ($fields[$motifcord+3])){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2409 ##print "at 608 we are doing this:length($microsat)+$intervalposs[$nikaal]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2410 # print " print fields = @fields\n " if !exists $fields[$motifcord+3];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2411 <STDIN> if !exists $fields[$motifcord+3];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2412 my $currpos = length($microsat)+$intervalposs[$nikaal];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2413 $fields[$motifcord+3] = $fields[$motifcord+3].",".$currpos;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2414 $fields[$motifcord+4] = $fields[$motifcord+4]+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2415
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2416 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2417
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2418 else {$fields[$motifcord+3] = length($microsat)+$intervalposs[$nikaal]; $fields[$motifcord+4]=1}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2419
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2420 ##print "new fields 16 = $fields[16]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2421
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2422 ##print "new fields 17 = $fields[17]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2423 my $returnline = join("\t",@fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2424 my $pastline = $returnline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2425 if ($fields[$microsatcord] =~ /\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2426 $returnline = multiSpecies_compoundClarifyer_merge($returnline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2427 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2428 #print "finally right-extended line = ",$returnline,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2429 return $returnline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2430 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2431 sub longest_array_element{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2432 my $counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2433 my($max) = shift(@_);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2434 my $maxcounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2435 foreach my $temp (@_) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2436 $counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2437 #print "finding largest array: $maxcounter \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2438 if(length($temp) > length($max)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2439 $max = $temp;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2440 $maxcounter = $counter;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2441 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2442 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2443 return($maxcounter);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2444 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2445 sub shortest_array_element{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2446 my $counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2447 my($min) = shift(@_);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2448 my $mincounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2449 foreach my $temp (@_) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2450 $counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2451 #print "finding largest array: $mincounter \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2452 if(length($temp) < length($min)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2453 $min = $temp;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2454 $mincounter = $counter;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2455 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2456 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2457 return($mincounter);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2458 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2459
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2460
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2461 sub left_extention_permission_giver{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2462 my @fields = split(/\t/,$_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2463 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2464 $microsat =~ s/(^\[)|-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2465 my $motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2466 my $firstmotif = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2467 my $firststretch = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2468 my @stretches=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2469 if ($motif =~ /^\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2470 $motif =~ s/^\[//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2471 $motif =~ /([a-zA-Z]+)\].*/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2472 $firstmotif = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2473 @stretches = split(/\]/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2474 $firststretch = $stretches[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2475 ##print "firststretch = $firststretch\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2476 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2477 else {$firstmotif = $motif;$firststretch = $microsat;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2478
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2479 if (length($firststretch) < $thresholds[length($firstmotif)]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2480 return "no";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2481 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2482 else {return "yes";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2483
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2484 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2485 sub right_extention_permission_giver{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2486 my @fields = split(/\t/,$_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2487 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2488 $microsat =~ s/-|(\]$)//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2489 my $motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2490 my $temp_lastmotif = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2491 my $laststretch = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2492 my @stretches=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2493
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2494
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2495 if ($motif =~ /\]/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2496 $motif =~ s/\]$//gs;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2497 $motif =~ /.*\[([a-zA-Z]+)$/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2498 $temp_lastmotif = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2499 @stretches = split(/\[/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2500 $laststretch = pop(@stretches);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2501 ##print "last stretch = $laststretch\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2502 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2503 else {$temp_lastmotif = $motif; $laststretch = $microsat;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2504
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2505 if (length($laststretch) < $thresholds[length($temp_lastmotif)]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2506 return "no";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2507 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2508 else { return "yes";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2509
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2510
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2511 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2512 sub multiSpecies_compoundClarifyer_merge{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2513 my $line = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2514 #print "sent for mering: $line \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2515 my @mields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2516 my @fields = @mields;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2517 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2518 my $motifline = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2519 my $microsatcopy = $microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2520 $microsatcopy =~ s/^\[|\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2521 my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2522 my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2523 shift @inields;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2524 #print "inields =@inields<\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2525 $motifline =~ s/^\[|\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2526 my @motields = split(/\]\[/,$motifline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2527 my @firstmotifs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2528 my @lastmotifs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2529 for my $i (0 ... $#microields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2530 $firstmotifs[$i] = substr($microields[$i],0,length($motields[$i]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2531 $lastmotifs[$i] = substr($microields[$i],-length($motields[$i]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2532 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2533 #print "firstmotif = @firstmotifs... lastmotif = @lastmotifs\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2534 my @mergelist = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2535 my @inter_poses = split(/,/,$fields[$interr_poscord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2536 my $no_of_interruptions = $fields[$no_of_interruptionscord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2537 my @interruptions = split(/,/,$fields[$interrcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2538 my @interrtypes = split(/,/,$fields[$interrtypecord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2539 my $stopper = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2540 for my $i (0 ... $#motields-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2541 #print "studying connection of $motields[$i] and $motields[$i+1], i = $i in $microsat\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2542 if (($lastmotifs[$i] eq $firstmotifs[$i+1]) && !exists $inields[$i]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2543 $stopper = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2544 push(@mergelist, ($i)."_".($i+1));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2545 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2546 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2547
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2548 return $line if scalar(@mergelist) == 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2549
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2550 foreach my $merging (@mergelist){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2551 my @sets = split(/_/, $merging);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2552 my @tempmicro = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2553 my @tempmot = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2554 for my $i (0 ... $sets[0]-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2555 push(@tempmicro, "[".$microields[$i]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2556 push(@tempmicro, $inields[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2557 push(@tempmot, "[".$motields[$i]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2558 #print "adding pre-motifs number $i\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2559 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2560 my $pusher = "[".$microields[$sets[0]].$microields[$sets[1]]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2561 push (@tempmicro, $pusher);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2562 push(@tempmot, "[".$motields[$sets[0]]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2563 my $outcoming = -2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2564 for my $i ($sets[1]+1 ... $#microields-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2565 push(@tempmicro, "[".$microields[$i]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2566 push(@tempmicro, $inields[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2567 push(@tempmot, "[".$motields[$i]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2568 #print "adding post-motifs number $i\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2569 $outcoming = $i;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2570 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2571 if ($outcoming != -2){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2572 #print "outcoming = $outcoming \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2573 push(@tempmicro, "[".$microields[$outcoming+1 ]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2574 push(@tempmot,"[". $motields[$outcoming+1]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2575 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2576 $fields[$microsatcord] = join("",@tempmicro);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2577 $fields[$motifcord] = join("",@tempmot);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2578
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2579 splice(@interrtypes, $sets[0], 1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2580 $fields[$interrtypecord] = join(",",@interrtypes);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2581 splice(@interruptions, $sets[0], 1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2582 $fields[$interrcord] = join(",",@interruptions);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2583 splice(@inter_poses, $sets[0], 1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2584 $fields[$interr_poscord] = join(",",@inter_poses);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2585 $no_of_interruptions = $no_of_interruptions - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2586 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2587
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2588 if ($no_of_interruptions == 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2589 $fields[$microsatcord] =~ s/^\[|\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2590 $fields[$motifcord] =~ s/^\[|\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2591 $line = join("\t", @fields[0 ... $motifcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2592 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2593 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2594 $line = join("\t", @fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2595 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2596 return $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2597 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2598
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2599 sub thrashallow{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2600 my $motif = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2601 return 4 if length($motif) == 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2602 return 6 if length($motif) == 3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2603 return 8 if length($motif) == 4;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2604
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2605 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2606
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2607 #xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2608
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2609
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2610 #xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2611 sub multispecies_filtering_compound_microsats{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2612 my $unfiltered = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2613 my $filtered = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2614 my $residue = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2615 my $no_of_species = $_[5];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2616 open(UNF,"<$unfiltered") or die "Cannot open file $unfiltered: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2617 open(FIL,">$filtered") or die "Cannot open file $filtered: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2618 open(RES,">$residue") or die "Cannot open file $residue: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2619
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2620 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2621 $startcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2622 $strandcord = 2 + (4*$no_of_species) + 3 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2623 $endcord = 2 + (4*$no_of_species) + 4 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2624 $microsatcord = 2 + (4*$no_of_species) + 5 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2625 $motifcord = 2 + (4*$no_of_species) + 6 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2626
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2627 my @sub_thresholds = ("0");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2628 push(@sub_thresholds, split(/_/,$_[3]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2629 my @thresholds = ("0");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2630 push(@thresholds, split(/_/,$_[4]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2631
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2632 while (my $line = <UNF>) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2633 if ($line !~ /compound/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2634 print FIL $line,"\n"; next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2635 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2636 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2637 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2638 my $motifline = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2639 $motifline =~ s/^\[|\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2640 my @motifs = split(/\]\[/,$motifline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2641 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2642 $microsat =~ s/^\[|\]$|-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2643 my @microsats = split(/\][a-zA-Z|-]*\[/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2644
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2645 my $stopper = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2646 for my $i (0 ... $#motifs){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2647 my @common = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2648 my $probe = $motifs[$i].$motifs[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2649 my $motif_size = length($motifs[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2650
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2651 for my $j (0 ... $#motifs){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2652 next if length($motifs[$i]) != length($motifs[$j]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2653 push(@common, length($microsats[$j])) if $probe =~ /$motifs[$j]/i;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2654 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2655
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2656 if (largest_microsat(@common) < $sub_thresholds[$motif_size]) {$stopper = 1; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2657 else {next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2658 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2659
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2660 if ($stopper == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2661 print RES $line,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2662 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2663 else { print FIL $line,"\n"; }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2664 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2665 close FIL;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2666 close RES;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2667 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2668
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2669 #xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2670
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2671
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2672 #xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2673
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2674 sub chromosome_unrand_breaker{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2675 # print "IN chromosome_unrand_breaker: @_\n ";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2676 my $input1 = $_[0]; ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2677 my $dir = $_[1]; ###### directory where subsets are put
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2678 my $output2 = $_[2]; ###### list of subset files
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2679 my $increment = $_[3];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2680 my $info = $_[4];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2681 my $chr = $_[5];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2682 open(SEQ,"<$input1") or die "Cannot open file $input1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2683
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2684 open(OUT,">$output2") or die "Cannot open file $output2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2685
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2686 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2687 # NOW READING THE SEQUENCE FILE
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2688
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2689 my $seed = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2690 my $subset = $dir.$info."_".$chr."_".$seed."_".($seed+$increment);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2691 print OUT $subset,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2692 open(SUB,">$subset");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2693
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2694 while(my $sine = <SEQ>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2695 $seed++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2696 print SUB $sine;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2697
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2698 if ($seed%$increment == 0 ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2699 close SUB;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2700 $subset = $dir.$info."_".$chr."_".$seed."_".($seed+$increment);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2701 open(SUB,">$subset");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2702 print SUB $sine;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2703 print OUT $subset,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2704 # print $subset,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2705 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2706 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2707 close OUT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2708 close SUB;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2709 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2710 #xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2711
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2712
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2713 #xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2714 sub multiSpecies_interruptedMicrosatHunter{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2715 # print "IN multiSpecies_interruptedMicrosatHunter: @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2716 my $input1 = $_[0]; ###### the *_sput_op4_ii file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2717 my $input2 = $_[1]; ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2718 my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2719 my $output2 = $_[3]; ###### uninterrupted microsatellite file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2720 my $org = $_[4];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2721 my $no_of_species = $_[5];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2722
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2723 my @thresholds = "0";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2724 push(@thresholds, split(/_/,$_[6]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2725
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2726 # print "thresholds = @thresholds \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2727 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2728 $typecord = 2 + (4*$no_of_species) + 1 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2729 $startcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2730 $strandcord = 2 + (4*$no_of_species) + 3 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2731 $endcord = 2 + (4*$no_of_species) + 4 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2732 $microsatcord = 2 + (4*$no_of_species) + 5 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2733 $motifcord = 2 + (4*$no_of_species) + 6 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2734 $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2735
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2736 $interr_poscord = $motifcord + 3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2737 $no_of_interruptionscord = $motifcord + 4;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2738 $interrcord = $motifcord + 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2739 $interrtypecord = $motifcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2740
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2741
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2742 $prinkter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2743 # print "prionkytet = $prinkter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2744
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2745 open(IN,"<$input1") or die "Cannot open file $input1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2746 open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2747
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2748 open(INT,">$output1") or die "Cannot open file $output2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2749 open(UNINT,">$output2") or die "Cannot open file $output2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2750
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2751 # print "opened files !!\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2752 my $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2753 my $microcounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2754
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2755 my %micros = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2756 while (my $line = <IN>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2757 # print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2758 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2759 if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s+([0-9a-zA-Z_]+)\s([0-9]+)\s+([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2760 my $key = join("\t",$1, $2, $3, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2761 # print $key, "#-#-#-#-#-#-#-#\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2762 push (@{$micros{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2763 $microcounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2764 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2765 else {#print $line if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2766 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2767 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2768 # print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2769 close IN;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2770 my @deletedlines = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2771 # print "done hash \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2772 $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2773 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2774 # NOW READING THE SEQUENCE FILE
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2775 while(my $sine = <SEQ>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2776 #print $linecounter,"\n" if $linecounter % 1000 == 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2777 my %microstart=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2778 my %microend=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2779 my @sields = split(/\t/,$sine);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2780 my $key = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2781 if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2782 $key = join("\t",$1, $2, $3, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2783 # print $key, "<-<-<-<-<-<-<-<\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2784 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2785
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2786 # $prinkter = 1 if $sine =~ /^>H\t499\t/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2787
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2788 if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2789 my @microstring = @{$micros{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2790 delete $micros{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2791 my @filteredmicrostring;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2792 # print "sequence = $sields[$sequencepos]" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2793 foreach my $line (@microstring){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2794 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2795 my $copy_line = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2796 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2797 my $start = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2798 my $end = $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2799
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2800 # print $line if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2801 #LOOKING FOR LEFTWARD EXTENTION OF MICROSATELLITE
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2802 my $newline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2803 while(1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2804 # print "\n before left sequence = $sields[$sequencepos]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2805 if (multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver($line) eq "no") {last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2806
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2807 $newline = multiSpecies_interruptedMicrosatHunter_left_extender($line, $sields[$sequencepos],$org);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2808 if ($newline eq $line){$line = $newline; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2809 else {$line = $newline;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2810
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2811 if (multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver($line) eq "no") {last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2812 # print "returned line from left extender= $line \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2813 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2814 while(1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2815 # print "sequence = $sields[$sequencepos]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2816 if (multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver($line) eq "no") {last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2817
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2818 $newline = multiSpecies_interruptedMicrosatHunter_right_extender($line, $sields[$sequencepos],$org);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2819 if ($newline eq $line){$line = $newline; last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2820 else {$line = $newline;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2821
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2822 if (multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver($line) eq "no") {last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2823 # print "returned line from right extender= $line \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2824 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2825 # print "\n>>>>>>>>>>>>>>>>\n In the end, the line is: \n$line\n<<<<<<<<<<<<<<<<\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2826
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2827 my @tempfields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2828 if ($tempfields[$microsatcord] =~ /\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2829 print INT $line,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2830 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2831 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2832 print UNINT $line,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2833 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2834
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2835 if ($line =~ /NULL/){ next; }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2836 push(@filteredmicrostring, $line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2837 push (@{$microstart{$start}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2838 push (@{$microend{$end}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2839 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2840
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2841 my $firstflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2842
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2843 } #if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2844 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2845 close INT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2846 close UNINT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2847 # print "final number of lines = $linecounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2848 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2849
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2850 sub multiSpecies_interruptedMicrosatHunter_left_extender{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2851 my ($line, $seq, $org) = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2852 # print "left extender, like passed = $line\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2853 # print "in left extender... line passed = $line and sequence is $seq\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2854 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2855 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2856 my $rstart = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2857 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2858 $microsat =~ s/\[|\]//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2859 my $rend = $rstart + length($microsat)-1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2860 $microsat =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2861 my $motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2862 my $firstmotif = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2863
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2864 if ($motif =~ /^\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2865 $motif =~ s/^\[//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2866 $motif =~ /([a-zA-Z]+)\].*/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2867 $firstmotif = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2868 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2869 else {$firstmotif = $motif;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2870
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2871 # print "hacked microsat = $microsat, motif = $motif, firstmotif = $firstmotif\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2872 my $leftphase = substr($microsat, 0,length($firstmotif));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2873 my $phaser = $leftphase.$leftphase;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2874 my @phase = split(/\s*/,$leftphase);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2875 my @phases;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2876 my @copy_phases = @phases;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2877 my $crawler=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2878 for (0 ... (length($leftphase)-1)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2879 push(@phases, substr($phaser, $crawler, length($leftphase)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2880 $crawler++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2881 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2882
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2883 my $start = $rstart;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2884 my $end = $rend;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2885
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2886 my $leftseq = substr($seq, 0, $start);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2887 # print "left phases are @phases , start = $start left sequence = ",substr($leftseq, -10),"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2888 my @extentions = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2889 my @trappeds = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2890 my @intervalposs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2891 my @trappedposs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2892 my @trappedphases = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2893 my @intervals = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2894 my $firstmotif_length = length($firstmotif);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2895 foreach my $phase (@phases){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2896 # print "left phase\t",substr($leftseq, -10),"\t$phase\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2897 # print "search patter = (($phase)+([a-zA-Z|-]{0,$firstmotif_length})) \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2898 if ($leftseq =~ /(($phase)+([a-zA-Z|-]{0,$firstmotif_length}))$/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2899 # print "in left pattern\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2900 my $trapped = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2901 my $trappedpos = length($leftseq)-length($trapped);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2902 my $interval = $3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2903 my $intervalpos = index($trapped, $interval) + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2904 # print "left trapped = $trapped, interval = $interval, intervalpos = $intervalpos\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2905
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2906 my $extention = substr($trapped, 0, length($trapped)-length($interval));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2907 my $leftpeep = substr($seq, 0, ($start-length($trapped)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2908 my @passed_overhangs;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2909
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2910 for my $i (1 ... length($phase)-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2911 my $overhang = substr($phase, -length($phase)+$i);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2912 # print "current overhang = $overhang, leftpeep = ",substr($leftpeep,-10)," whole sequence = ",substr($seq, ($end - ($end-$start) - 20), (($end-$start)+20)),"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2913 #TEMPORARY... BETTER METHOD NEEDED
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2914 $leftpeep =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2915 if ($leftpeep =~ /$overhang$/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2916 push(@passed_overhangs,$overhang);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2917 # print "l overhang\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2918 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2919 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2920
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2921 if(scalar(@passed_overhangs)>0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2922 my $overhang = $passed_overhangs[longest_array_element(@passed_overhangs)];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2923 $extention = $overhang.$extention;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2924 $trapped = $overhang.$trapped;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2925 # print "trapped extended to $trapped \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2926 $trappedpos = length($leftseq)-length($trapped);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2927 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2928
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2929 push(@extentions,$extention);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2930 # print "extentions = @extentions \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2931
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2932 push(@trappeds,$trapped );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2933 push(@intervalposs,length($extention)+1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2934 push(@trappedposs, $trappedpos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2935 # print "trappeds = @trappeds\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2936 push(@trappedphases, substr($extention,0,length($phase)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2937 push(@intervals, $interval);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2938 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2939 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2940 if (scalar(@trappeds == 0)) {return $line;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2941
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2942 ############################ my $nikaal = longest_array_element(@trappeds);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2943 my $nikaal = shortest_array_element(@intervals);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2944
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2945 # print "longest element found = $nikaal \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2946
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2947 if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2948 $fields[$motifcord] = "[".$trappedphases[$nikaal]."]".$fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2949 #print "new fields 9 = $fields[9]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2950 $fields[$startcord] = $fields[$startcord]-length($trappeds[$nikaal]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2951
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2952 #print "new fields 9 = $fields[9]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2953
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2954 if($fields[$microsatcord] !~ /^\[/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2955 $fields[$microsatcord] = "[".$fields[$microsatcord]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2956 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2957
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2958 $fields[$microsatcord] = "[".$extentions[$nikaal]."]".$intervals[$nikaal].$fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2959 #print "new fields 14 = $fields[12]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2960
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2961 #print "scalar of fields = ",scalar(@fields),"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2962
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2963
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2964 if (scalar(@fields) > $motifcord+1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2965 $fields[$motifcord+1] = "indel/deletion,".$fields[$motifcord+1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2966 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2967 else{$fields[$motifcord+1] = "indel/deletion";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2968 #print "new fields 14 = $fields[14]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2969
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2970 if (scalar(@fields)>$motifcord+2){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2971 $fields[$motifcord+2] = $intervals[$nikaal].",".$fields[$motifcord+2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2972 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2973 else{$fields[$motifcord+2] = $intervals[$nikaal];}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2974 #print "new fields 15 = $fields[15]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2975
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2976 my @seventeen=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2977
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2978 if (scalar(@fields)>$motifcord+3){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2979 @seventeen = split(/,/,$fields[$motifcord+3]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2980 # print "scalarseventeen =@seventeen<-\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2981 for (0 ... scalar(@seventeen)-1) {$seventeen[$_] = $seventeen[$_]+length($trappeds[$nikaal]);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2982 $fields[$motifcord+3] = ($intervalposs[$nikaal]).",".join(",",@seventeen);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2983 $fields[$motifcord+4] = $fields[$motifcord+4]+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2984 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2985
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2986 else {$fields[$motifcord+3] = $intervalposs[$nikaal]; $fields[$motifcord+4]=1}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2987
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2988 #print "new fields 16 = $fields[16]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2989 #print "new fields 17 = $fields[17]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2990
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2991 # return join("\t",@fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2992 my $returnline = join("\t",@fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2993 my $pastline = $returnline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2994 if ($fields[$microsatcord] =~ /\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2995 $returnline = multiSpecies_interruptedMicrosatHunter_merge($returnline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2996 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2997 # print "finally left-extended line = ",$returnline,"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2998 return $returnline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
2999 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3000
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3001 sub multiSpecies_interruptedMicrosatHunter_right_extender{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3002 # print "right extender\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3003 my ($line, $seq, $org) = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3004 # print "in right extender... line passed = $line\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3005 # print "line = $line, sequence = ",$seq, "\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3006 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3007 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3008 my $rstart = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3009 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3010 $microsat =~ s/\[|\]//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3011 my $rend = $rstart + length($microsat)-1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3012 $microsat =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3013 my $motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3014 my $temp_lastmotif = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3015
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3016 if ($motif =~ /\]$/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3017 $motif =~ s/\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3018 $motif =~ /.*\[([a-zA-Z]+)/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3019 $temp_lastmotif = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3020 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3021 else {$temp_lastmotif = $motif;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3022 my $lastmotif = substr($microsat,-length($temp_lastmotif));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3023 # print "hacked microsat = $microsat, motif = $motif, lastmotif = $lastmotif\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3024 my $rightphase = substr($microsat, -length($lastmotif));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3025 my $phaser = $rightphase.$rightphase;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3026 my @phase = split(/\s*/,$rightphase);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3027 my @phases;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3028 my @copy_phases = @phases;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3029 my $crawler=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3030 for (0 ... (length($rightphase)-1)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3031 push(@phases, substr($phaser, $crawler, length($rightphase)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3032 $crawler++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3033 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3034
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3035 my $start = $rstart;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3036 my $end = $rend;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3037
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3038 my $rightseq = substr($seq, $end+1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3039 # print "length of sequence = " ,length($seq), "the coordinate to start from = ", $end+1, "\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3040 # print "right phases are @phases , end = $end right sequence = ",substr($rightseq,0,10),"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3041 my @extentions = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3042 my @trappeds = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3043 my @intervalposs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3044 my @trappedposs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3045 my @trappedphases = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3046 my @intervals = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3047 my $lastmotif_length = length($lastmotif);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3048 foreach my $phase (@phases){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3049 # print "right phase\t$phase\t",substr($rightseq,0,10),"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3050 # print "search patter = (([a-zA-Z|-]{0,$lastmotif_length})($phase)+) \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3051 if ($rightseq =~ /^(([a-zA-Z|-]{0,$lastmotif_length}?)($phase)+)/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3052 # print "in right pattern\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3053 my $trapped = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3054 my $trappedpos = $end+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3055 my $interval = $2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3056 my $intervalpos = index($trapped, $interval) + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3057 # print "trapped = $trapped, interval = $interval\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3058
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3059 my $extention = substr($trapped, length($interval));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3060 my $rightpeep = substr($seq, ($end+length($trapped))+1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3061 my @passed_overhangs = "";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3062
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3063 #TEMPORARY... BETTER METHOD NEEDED
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3064 $rightpeep =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3065
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3066 for my $i (1 ... length($phase)-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3067 my $overhang = substr($phase,0, $i);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3068 # print "current extention = $extention, overhang = $overhang, rightpeep = ",substr($rightpeep,0,10),"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3069 if ($rightpeep =~ /^$overhang/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3070 push(@passed_overhangs, $overhang);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3071 # print "r overhang\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3072 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3073 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3074 if (scalar(@passed_overhangs) > 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3075 my $overhang = @passed_overhangs[longest_array_element(@passed_overhangs)];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3076 $extention = $extention.$overhang;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3077 $trapped = $trapped.$overhang;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3078 # print "trapped extended to $trapped \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3079 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3080
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3081 push(@extentions,$extention);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3082 #print "extentions = @extentions \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3083
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3084 push(@trappeds,$trapped );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3085 push(@intervalposs,$intervalpos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3086 push(@trappedposs, $trappedpos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3087 # print "trappeds = @trappeds\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3088 push(@trappedphases, substr($extention,0,length($phase)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3089 push(@intervals, $interval);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3090 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3091 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3092 if (scalar(@trappeds == 0)) {return $line;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3093
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3094 ################################### my $nikaal = longest_array_element(@trappeds);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3095 my $nikaal = shortest_array_element(@intervals);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3096
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3097 # print "longest element found = $nikaal \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3098
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3099 if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3100 $fields[$motifcord] = $fields[$motifcord]."[".$trappedphases[$nikaal]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3101 $fields[$endcord] = $fields[$endcord] + length($trappeds[$nikaal]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3102
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3103
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3104 if($fields[$microsatcord] !~ /^\[/i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3105 $fields[$microsatcord] = "[".$fields[$microsatcord]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3106 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3107
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3108 $fields[$microsatcord] = $fields[$microsatcord].$intervals[$nikaal]."[".$extentions[$nikaal]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3109
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3110
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3111 if (scalar(@fields) > $motifcord+1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3112 $fields[$motifcord+1] = $fields[$motifcord+1].",indel/deletion";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3113 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3114 else{$fields[$motifcord+1] = "indel/deletion";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3115
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3116 if (scalar(@fields)>$motifcord+2){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3117 $fields[$motifcord+2] = $fields[$motifcord+2].",".$intervals[$nikaal];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3118 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3119 else{$fields[$motifcord+2] = $intervals[$nikaal];}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3120
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3121 my @seventeen=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3122 if (scalar(@fields)>$motifcord+3){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3123 #print "at 608 we are doing this:length($microsat)+$intervalposs[$nikaal]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3124 my $currpos = length($microsat)+$intervalposs[$nikaal];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3125 $fields[$motifcord+3] = $fields[$motifcord+3].",".$currpos;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3126 $fields[$motifcord+4] = $fields[$motifcord+4]+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3127
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3128 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3129
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3130 else {$fields[$motifcord+3] = length($microsat)+$intervalposs[$nikaal]; $fields[$motifcord+4]=1}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3131
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3132 # print "finally right-extended line = ",join("\t",@fields),"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3133 # return join("\t",@fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3134
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3135 my $returnline = join("\t",@fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3136 my $pastline = $returnline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3137 if ($fields[$microsatcord] =~ /\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3138 $returnline = multiSpecies_interruptedMicrosatHunter_merge($returnline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3139 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3140 # print "finally right-extended line = ",$returnline,"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3141 return $returnline;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3142
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3143 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3144
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3145 sub multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3146 my @fields = split(/\t/,$_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3147 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3148 $microsat =~ s/(^\[)|-//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3149 my $motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3150 chomp $motif;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3151 # print $motif, "\n" if $motif !~ /^\[/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3152 my $firstmotif = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3153 my $firststretch = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3154 my @stretches=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3155
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3156 # print "motif = $motif, microsat = $microsat\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3157 if ($motif =~ /^\[/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3158 $motif =~ s/^\[//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3159 $motif =~ /([a-zA-Z]+)\].*/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3160 $firstmotif = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3161 @stretches = split(/\]/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3162 $firststretch = $stretches[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3163 #print "firststretch = $firststretch\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3164 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3165 else {$firstmotif = $motif;$firststretch = $microsat;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3166 # print "if length:firststretch - length($firststretch) < threshes length :firstmotif ($firstmotif) - $thresholds[length($firstmotif)]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3167 if (length($firststretch) < $thresholds[length($firstmotif)]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3168 return "no";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3169 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3170 else {return "yes";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3171
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3172 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3173 sub multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3174 my @fields = split(/\t/,$_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3175 my $microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3176 $microsat =~ s/-|(\]$)//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3177 my $motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3178 chomp $motif;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3179 my $temp_lastmotif = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3180 my $laststretch = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3181 my @stretches=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3182
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3183
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3184 if ($motif =~ /\]/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3185 $motif =~ s/\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3186 $motif =~ /.*\[([a-zA-Z]+)$/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3187 $temp_lastmotif = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3188 @stretches = split(/\[/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3189 $laststretch = pop(@stretches);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3190 #print "last stretch = $laststretch\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3191 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3192 else {$temp_lastmotif = $motif; $laststretch = $microsat;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3193
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3194 if (length($laststretch) < $thresholds[length($temp_lastmotif)]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3195 return "no";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3196 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3197 else { return "yes";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3198
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3199
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3200 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3201 sub checking_substitutions{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3202
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3203 my ($line, $seq, $startprobes, $endprobes) = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3204 #print "sequence = $seq \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3205 #print "COMMAND = \n $line, \n $seq, \n $startprobes \n, $endprobes\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3206 # <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3207 my @seqarray = split(/\s*/,$seq);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3208 my @startsubst_probes = split(/\|/,$startprobes);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3209 my @endsubst_probes = split(/\|/,$endprobes);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3210 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3211 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3212 my $start = $fields[11] - $fields[10];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3213 my $end = $fields[13] - $fields[10];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3214 my $motif = $fields[9]; #IN FUTURE, USE THIS AS A PROBE, LIKE MOTIF = $FIELDS[9].$FIELDS[9]
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3215 $motif =~ s/\[|\]//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3216 my $microsat = $fields[14];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3217 $microsat =~ s/\[|\]//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3218 #------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3219 # GETTING START AND END PHASES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3220 my $startphase = substr($microsat,0, length($motif));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3221 my $endphase = substr($microsat,-length($motif), length($motif));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3222 #print "start and end phases are - $startphase and $endphase\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3223 my $startflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3224 my $endflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3225 my $substitution_distance = length($motif);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3226 my $prestart = $start - $substitution_distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3227 my $postend = $end + $substitution_distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3228 my @endadds = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3229 my @startadds = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3230 if (($prestart < 0) || ($postend > scalar(@seqarray))) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3231 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3232 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3233 #------------------------------------------------------------------------#------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3234 # CHECKING FOR SUBSTITUTION PROBES NOW
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3235
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3236 if ($fields[8] ne "mononucleotide"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3237 while ($startflag eq "down"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3238 my $search = join("",@seqarray[$prestart...($start-1)]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3239 #print "search is from $prestart...($start-1) = $search\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3240 foreach my $probe (@startsubst_probes){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3241 #print "\t\tprobe = $probe\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3242 if ($search =~ /^$probe/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3243 #print "\tfound addition to the left - $search \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3244 my $copyprobe = $probe;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3245 my $type;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3246 my $subspos = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3247 my $interruption = "";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3248 if ($search eq $startphase) { $type = "NONE";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3249 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3250 $copyprobe =~ s/\[a-zA-Z\]/^/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3251 $subspos = index($copyprobe,"^") + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3252 $type = "substitution";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3253 $interruption = substr($search, $subspos,1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3254 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3255 my $addinfo = join("\t",$prestart, $start, $search, $type, $interruption, $subspos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3256 #print "adding information: $addinfo \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3257 push(@startadds, $addinfo);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3258 $prestart = $prestart - $substitution_distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3259 $start = $start-$substitution_distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3260 $startflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3261
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3262 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3263 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3264 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3265 $startflag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3266 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3267 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3268 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3269 #<STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3270 while ($endflag eq "down"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3271 my $search = join("",@seqarray[($end+1)...$postend]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3272 #print "search is from ($end+1)...$postend] = $search\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3273
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3274 foreach my $probe (@endsubst_probes){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3275 #print "\t\tprobe = $probe\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3276 if ($search =~ /$probe$/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3277 my $copyprobe = $probe;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3278 my $type;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3279 my $subspos = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3280 my $interruption = "";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3281 if ($search eq $endphase) { $type = "NONE";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3282 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3283 $copyprobe =~ s/\[a-zA-Z\]/^/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3284 $subspos = index($copyprobe,"^") + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3285 $type = "substitution";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3286 $interruption = substr($search, $subspos,1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3287 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3288 my $addinfo = join("\t",$end, $postend, $search, $type, $interruption, $subspos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3289 #print "adding information: $addinfo \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3290 push(@endadds, $addinfo);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3291 $postend = $postend + $substitution_distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3292 $end = $end+$substitution_distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3293 push(@endadds, $search);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3294 $endflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3295 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3296 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3297 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3298 $endflag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3299 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3300 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3301 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3302 #print "startadds = @startadds, endadds = @endadds \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3303
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3304 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3305 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3306 sub microsat_packer{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3307 my $microsat = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3308 my $addition = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3309
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3310
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3311
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3312 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3313 sub multiSpecies_interruptedMicrosatHunter_merge{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3314 $prinkter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3315 # print "~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3316 my $line = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3317 # print "sent for mering: $line \n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3318 my @mields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3319 my @fields = @mields;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3320 my $microsat = allCaps($fields[$microsatcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3321 my $motifline = allCaps($fields[$motifcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3322 my $microsatcopy = $microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3323 # print "microsat = $microsat|\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3324 $microsatcopy =~ s/^\[|\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3325 chomp $microsatcopy;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3326 my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3327 my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3328 shift @inields;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3329 # print "inields =",join("|",@inields)," microields = ",join("|",@microields)," and count of microields = ", $#microields,"\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3330 $motifline =~ s/^\[|\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3331 my @motields = split(/\]\[/,$motifline);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3332 my @firstmotifs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3333 my @lastmotifs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3334 for my $i (0 ... $#microields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3335 $firstmotifs[$i] = substr($microields[$i],0,length($motields[$i]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3336 $lastmotifs[$i] = substr($microields[$i],-length($motields[$i]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3337 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3338 # print "firstmotif = @firstmotifs... lastmotif = @lastmotifs\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3339 my @mergelist = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3340 my @inter_poses = split(/,/,$fields[$interr_poscord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3341 my $no_of_interruptions = $fields[$no_of_interruptionscord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3342 my @interruptions = split(/,/,$fields[$interrcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3343 my @interrtypes = split(/,/,$fields[$interrtypecord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3344 my $stopper = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3345 for my $i (0 ... $#motields-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3346 # print "studying connection of $motields[$i] and $motields[$i+1], i = $i in $microsat\n:$lastmotifs[$i] eq $firstmotifs[$i+1]?\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3347 if ((allCaps($lastmotifs[$i]) eq allCaps($firstmotifs[$i+1])) && (!exists $inields[$i] || $inields[$i] !~ /[a-zA-Z]/)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3348 $stopper = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3349 push(@mergelist, ($i)."_".($i+1)); #<STDIN> if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3350 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3351 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3352
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3353 # print "mergelist = @mergelist\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3354 return $line if scalar(@mergelist) == 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3355 # print "merging @mergelist\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3356 # <STDIN> if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3357
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3358 foreach my $merging (@mergelist){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3359 my @sets = split(/_/, $merging);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3360 # print "sets = @sets\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3361 my @tempmicro = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3362 my @tempmot = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3363 # print "for loop going from 0 ... ", $sets[0]-1, "\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3364 for my $i (0 ... $sets[0]-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3365 # print " adding pre- i = $i adding: microields= $microields[$i]. motields = $motields[$i], inields = |$inields[$i]|\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3366 push(@tempmicro, "[".$microields[$i]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3367 push(@tempmicro, $inields[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3368 push(@tempmot, "[".$motields[$i]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3369 # print "adding pre-motifs number $i\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3370 # print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3371 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3372 # print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3373 # print "now pushing ", "[",$microields[$sets[0]]," and ",$microields[$sets[1]],"]\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3374 my $pusher = "[".$microields[$sets[0]].$microields[$sets[1]]."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3375 # print "middle is, from @motields - @sets, number 0 which is is\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3376 # print ": $motields[$sets[0]]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3377 push (@tempmicro, $pusher);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3378 push(@tempmot, "[".$motields[$sets[0]]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3379 push (@tempmicro, $inields[$sets[1]]) if $sets[1] != $#microields && exists $sets[1] && exists $inields[$sets[1]];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3380 my $outcoming = -2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3381 # print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3382 # print "for loop going from ",$sets[1]+1, " ... ", $#microields, "\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3383 for my $i ($sets[1]+1 ... $#microields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3384 # print " adding post- i = $i adding: microields= $microields[$i]. motields = $motields[$i]\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3385 push(@tempmicro, "[".$microields[$i]."]") if exists $microields[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3386 push(@tempmicro, $inields[$i]) unless $i == $#microields || !exists $inields[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3387 push(@tempmot, "[".$motields[$i]."]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3388 # print "adding post-motifs number $i\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3389 $outcoming = $i;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3390 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3391 # print "____________________________________________________________________________\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3392 $prinkter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3393 $fields[$microsatcord] = join("",@tempmicro);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3394 $fields[$motifcord] = join("",@tempmot);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3395 # print "tempmot = @tempmot, tempmicro = @tempmicro . microsat = $fields[$microsatcord] and motif = $fields[$motifcord] \n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3396
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3397 splice(@interrtypes, $sets[0], 1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3398 $fields[$interrtypecord] = join(",",@interrtypes);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3399 splice(@interruptions, $sets[0], 1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3400 $fields[$interrcord] = join(",",@interruptions);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3401 splice(@inter_poses, $sets[0], 1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3402 $fields[$interr_poscord] = join(",",@inter_poses);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3403 $no_of_interruptions = $no_of_interruptions - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3404 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3405
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3406 if ($no_of_interruptions == 0 && $line !~ /compound/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3407 $fields[$microsatcord] =~ s/^\[|\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3408 $fields[$motifcord] =~ s/^\[|\]$//sg;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3409 $line = join("\t", @fields[0 ... $motifcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3410 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3411 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3412 $line = join("\t", @fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3413 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3414 # print "post merging, the line is $line\n" if $prinkter ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3415 #<STDIN> if $stopper ==1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3416 return $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3417 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3418 sub interval_asseser{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3419 my $pre_phase = $_[0]; my $post_phase = $_[1]; my $inter = $_[3];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3420 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3421 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3422 sub allCaps{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3423 my $motif = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3424 $motif =~ s/a/A/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3425 $motif =~ s/c/C/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3426 $motif =~ s/t/T/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3427 $motif =~ s/g/G/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3428 return $motif;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3429 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3430
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3431
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3432 #xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx chromosome_unrand_breamultiSpecies_interruptedMicrosatHunterker xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3433
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3434
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3435 #xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3436 sub merge_interruptedMicrosats{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3437 # print "IN merge_interruptedMicrosats: @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3438 my $input0 = $_[0]; ######looks like this: my $t8humanoutput = $pipedir.$ptag."_nogap_op_unrand2"
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3439 my $input1 = $_[1]; ###### the *_sput_op4_ii file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3440 my $input2 = $_[2]; ###### the *_sput_op4_ii file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3441 $no_of_species = $_[3];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3442
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3443 my $output1 = $_[1]."_separate"; #$_[3]; ###### plain microsatellite file forward
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3444 my $output2 = $_[2]."_separate"; ##$_[4]; ###### plain microsatellite file reverse
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3445 my $output3 = $_[1]."_merged"; ##$_[5]; ###### plain microsatellite file forward
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3446 #my $output4 = $_[2]."_merged"; ##$_[6]; ###### plain microsatellite file reverse
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3447 #my $info = $_[4];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3448 #my @tags = split(/\t/,$info);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3449
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3450 open(SEQ,"<$input0") or die "Cannot open file $input0 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3451 open(INF,"<$input1") or die "Cannot open file $input1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3452 open(INR,"<$input2") or die "Cannot open file $input2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3453 open(OUTF,">$output1") or die "Cannot open file $output1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3454 open(OUTR,">$output2") or die "Cannot open file $output2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3455 open(MER,">$output3") or die "Cannot open file $output3 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3456 #open(MERR,">$output4") or die "Cannot open file $output4 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3457
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3458
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3459
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3460 $printer = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3461
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3462 # print "files opened \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3463 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3464 $startcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3465 $strandcord = 2 + (4*$no_of_species) + 3 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3466 $endcord = 2 + (4*$no_of_species) + 4 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3467 $microsatcord = 2 + (4*$no_of_species) + 5 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3468 $motifcord = 2 + (4*$no_of_species) + 6 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3469 $typecord = $infocord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3470 my $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3471
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3472 $interrtypecord = $motifcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3473 $interrcord = $motifcord + 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3474 $interr_poscord = $motifcord + 3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3475 $no_of_interruptionscord = $motifcord + 4;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3476 $mergestarts = $no_of_interruptionscord+ 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3477 $mergeends = $no_of_interruptionscord+ 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3478 $mergemicros = $no_of_interruptionscord+ 3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3479
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3480 # NOW ADDING FORWARD MICROSATELLITES TO HASH
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3481 my %fmicros = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3482 my $microcounter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3483 my $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3484 while (my $line = <INF>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3485 # print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3486 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3487 if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3488 my $key = join("\t",$1, $2, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3489 # print $key, "#-#-#-#-#-#-#-#\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3490 push (@{$fmicros{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3491 $microcounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3492 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3493 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3494 #print $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3495 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3496 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3497 # print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3498 close INF;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3499 my @deletedlines = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3500 # print "done forward hash \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3501 $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3502 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3503 # NOW ADDING REVERSE MICROSATELLITES TO HASH
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3504 my %rmicros = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3505 $microcounter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3506 while (my $line = <INR>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3507 # print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3508 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3509 if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3510 my $key = join("\t",$1, $2, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3511 # print $key, "#-#-#-#-#-#-#-#\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3512 push (@{$rmicros{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3513 $microcounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3514 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3515 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3516 #print "cant make key\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3517 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3518 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3519 # print "number of reverse microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3520 close INR;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3521 # print "done reverse hash \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3522 $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3523
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3524 #------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3525
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3526 while(my $sine = <SEQ>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3527 #<STDIN> if $sine =~ /16349128/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3528 next if $sine !~ /[a-zA-Z0-9]/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3529 # print "-" x 150, "\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3530 my @sields = split(/\t/,$sine);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3531 my @merged = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3532
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3533 my $key = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3534
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3535 if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3536 $key = join("\t",$1, $2, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3537 # print $key, "<-<-<-<-<-<-<-<\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3538 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3539 # print "key = $key\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3540
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3541 my @sets1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3542 my @sets2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3543 chomp $sields[$sequencepos];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3544 my $rev_sequence = reverse($sields[$sequencepos]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3545 $rev_sequence =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3546 $rev_sequence = " ".$rev_sequence;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3547 next if (!exists $fmicros{$key} && !exists $rmicros{$key});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3548
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3549 if (exists $fmicros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3550 # print "line no : $linecount\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3551 my @raw_microstring = @{$fmicros{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3552 my %starts = (); my %ends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3553 # print colored ['yellow'],"unsorted, unfiltered microats = \n" if $printer == 1; foreach (@raw_microstring) {print colored ['blue'],$_,"\n" if $printer == 1;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3554 my @microstring=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3555 for my $u (0 ... $#raw_microstring){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3556 my @tields = split(/\t/,$raw_microstring[$u]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3557 next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3558 push(@microstring, $raw_microstring[$u]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3559 $starts{$tields[$startcord]} = $tields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3560 $ends{$tields[$endcord]} = $tields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3561 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3562
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3563 # print "founf microstring in forward\n: @microstring\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3564 chomp @microstring;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3565 my $clusterresult = (find_clusters(@microstring, $sields[$sequencepos]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3566 @sets1 = split("\=", $clusterresult);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3567 my @temp = split(/_X0X_/,$sets1[0]) ; $microscanned+= scalar(@temp);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3568 # print "sets = ", join("<all\nmerged>", @sets1), "\n<<-sets1\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3569 } #if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3570
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3571 if (exists $rmicros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3572 # print "line no : $linecount\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3573 my @raw_microstring = @{$rmicros{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3574 my %starts = (); my %ends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3575 # print colored ['yellow'],"unsorted, unfiltered microats = \n" if $printer == 1; foreach (@raw_microstring) {print colored ['blue'],$_,"\n" if $printer == 1;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3576 my @microstring=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3577 for my $u (0 ... $#raw_microstring){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3578 my @tields = split(/\t/,$raw_microstring[$u]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3579 next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3580 push(@microstring, $raw_microstring[$u]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3581 $starts{$tields[$startcord]} = $tields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3582 $ends{$tields[$endcord]} = $tields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3583 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3584 # print "founf microstring in reverse\n: @microstring\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3585 chomp @microstring;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3586 # print "sending reversed sequence\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3587 my $clusterresult = (find_clusters(@microstring, $rev_sequence ) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3588 @sets2 = split("\=", $clusterresult);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3589 my @temp = split(/_X0X_/,$sets2[0]) ; $microscanned+= scalar(@temp);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3590 } #if (exists $micros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3591
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3592 my @popout1 = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3593 my @popout2 = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3594 my @forwardset = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3595 if (exists $sets2[1] ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3596 if(exists $sets1[0]) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3597 push (@popout1, $sets1[0],$sets2[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3598 my @forwardset = split("=", popOuter(@popout1, $rev_sequence ));#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3599 print OUTF join("\n",split("_X0X_", $forwardset[0])), "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3600 my @localmerged = split("_X0X_", $forwardset[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3601 my $sequence = $sields[$sequencepos];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3602 $sequence =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3603 # print "\nforwardset = @forwardset\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3604 for my $j (0 ... $#localmerged){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3605 $localmerged[$j] = invert_justCoordinates ($localmerged[$j], length($sequence));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3606 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3607
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3608 push (@merged, @localmerged);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3609
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3610 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3611 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3612 my @localmerged = split("_X0X_", $sets2[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3613 my $sequence = $sields[$sequencepos];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3614 $sequence =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3615 for my $j (0 ... $#localmerged){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3616 # print "\nlocalmerged = @localmerged\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3617 $localmerged[$j] = invert_justCoordinates ($localmerged[$j], length($sequence));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3618 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3619
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3620 push (@merged, @localmerged);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3621 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3622 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3623 elsif (exists $sets1[0]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3624 print OUTF join("\n",split("_X0X_", $sets1[0])), "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3625 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3626
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3627 my @reverseset= ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3628 if (exists $sets1[1]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3629 if (exists $sets2[0]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3630 push (@popout2, $sets2[0],$sets1[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3631 # print "popout2 = @popout2\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3632 my @reverseset = split("=", popOuter(@popout2, $sields[$sequencepos]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3633 #print "reverseset = $reverseset[1] < --- reverseset1\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3634 print OUTR join("\n",split("_X0X_", $reverseset[0])), "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3635 push(@merged, (split("_X0X_", $reverseset[1])));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3636 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3637 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3638 push(@merged, (split("_X0X_", $sets1[1])));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3639 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3640 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3641 elsif (exists $sets2[0]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3642 print OUTR join("\n",split("_X0X_", $sets2[0])), "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3643
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3644 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3645
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3646 if (scalar @merged > 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3647 my @filtered_merged = split("__",(filterDuplicates_merged(@merged)));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3648 print MER join("\n", @filtered_merged),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3649 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3650 # <STDIN> if $sine =~ /16349128/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3651
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3652 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3653 close(SEQ);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3654 close(INF);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3655 close(INR);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3656 close(OUTF);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3657 close(OUTR);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3658 close(MER);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3659
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3660 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3661 sub find_clusters{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3662 my @input = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3663 my $sequence = pop(@input);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3664 $sequence =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3665 my @microstring0 = @input;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3666 # print "IN: find_clusters:\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3667 my %microstart=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3668 my %microend=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3669 my @nonmerged = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3670 my @mergedSet = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3671 # print "set of microsats = @microstring \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3672 my @microstring = map { $_->[0] } sort custom map { [$_, split /\t/ ] } @microstring0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3673 # print "microstring = ", join("\n",@microstring0) ," \n---->\n", join("\n", @microstring),"\n ,,+." if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3674 #<STDIN> if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3675 my @tempmicrostring = @microstring;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3676 foreach my $line (@tempmicrostring){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3677 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3678 my $start = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3679 my $end = $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3680 next if $start !~ /[0-9]+/ || $end !~ /[0-9]+/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3681 # print " starts >>> start: $start = $fields[11] - $fields[10] || $end = $fields[13] - $fields[10]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3682 push (@{$microstart{$start}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3683 push (@{$microend{$end}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3684 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3685 my $firstflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3686 while( my $line =shift(@microstring)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3687 # print "-----------\nline = $line \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3688 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3689 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3690 my $start = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3691 my $end = $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3692 next if $start !~ /[0-9]+/ || $end !~ /[0-9]+/ || $distance !~ /[0-9]+/ ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3693 my $startmicro = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3694 my $endmicro = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3695 # print "start: $start = $fields[11] - $fields[10] || $end = $fields[13] - $fields[10]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3696
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3697 delete ($microstart{$start});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3698 delete ($microend{$end});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3699 my $flag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3700 my $startflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3701 my $endflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3702 my $prestart = $start - $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3703 my $postend = $end + $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3704 my @compoundlines = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3705 my %compoundhash = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3706 push (@compoundlines, $line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3707 push (@{$compoundhash{$line}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3708 my $startrank = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3709 my $endrank = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3710
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3711 while( ($startflag eq "down") || ($endflag eq "down") ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3712 # print "prestart=$prestart, post end =$postend.. seqlen =", length($sequence)," firstflag = $firstflag \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3713 if ( (($prestart < 0) && $firstflag eq "up") || (($postend > length($sequence) && $firstflag eq "up")) ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3714 # print "coming to the end of sequence,post end = $postend and sequence length =", length($sequence)," so exiting\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3715 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3716 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3717
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3718 $firstflag = "up";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3719 if ($startflag eq "down"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3720 for my $i ($prestart ... $end){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3721 if(exists $microend{$i}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3722 chomp $microend{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3723 if(exists $compoundhash{$microend{$i}[0]}) {next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3724 chomp $microend{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3725 push(@compoundlines, $microend{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3726 my @tields = split(/\t/,$microend{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3727 $startmicro = $microend{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3728 chomp $startmicro;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3729 $flag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3730 $startrank++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3731 # print "deleting $microend{$i}[0] and $microstart{$tields[$startcord]}[0]\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3732 delete $microend{$i};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3733 delete $microstart{$tields[$startcord]};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3734 $end = $tields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3735 $startflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3736 $prestart = $tields[$startcord] - $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3737 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3738 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3739 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3740 $flag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3741 $startflag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3742 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3743 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3744 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3745
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3746 if ($endflag eq "down"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3747
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3748 for my $i ($start ... $postend){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3749 # print "$start ----> $i -----> $postend\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3750 if(exists $microstart{$i} ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3751 chomp $microstart{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3752 if(exists $compoundhash{$microstart{$i}[0]}) {next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3753 chomp $microstart{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3754 push(@compoundlines, $microstart{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3755 my @tields = split(/\t/,$microstart{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3756 $endmicro = $microstart{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3757 $endrank++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3758 chomp $endmicro;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3759 $flag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3760 # print "deleting $microend{$tields[$endcord]}[0]\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3761
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3762 delete $microstart{$i} if exists $microstart{$i} ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3763 delete $microend{$tields[$endcord]} if exists $microend{$tields[$endcord]};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3764 # print "done\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3765
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3766 shift @microstring;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3767 $end = $tields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3768 $postend = $tields[$endcord] + $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3769 $endflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3770 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3771 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3772 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3773 $flag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3774 $endflag = 'up';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3775 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3776 # print "out of the if\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3777 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3778 # print "out of the for\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3779
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3780 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3781 # print "for next turn, flag status: startflag = $startflag and endflag = $endflag \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3782 } #end while( $flag eq "down")
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3783 # print "compoundlines = @compoundlines \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3784
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3785 if (scalar (@compoundlines) == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3786 push(@nonmerged, $line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3787
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3788 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3789 if (scalar (@compoundlines) > 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3790 # print "FROM CLUSTERER\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3791 push(@mergedSet,merge_microsats(@compoundlines, $sequence) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3792 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3793 } #end foreach my $line (@microstring){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3794 # print join("\n",@mergedSet),"<-----mergedSet\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3795 #<STDIN> if scalar(@mergedSet) > 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3796 # print "EXIT: find_clusters\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3797 return (join("_X0X_",@nonmerged). "=".join("_X0X_",@mergedSet));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3798 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3799
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3800 sub custom {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3801 $a->[$startcord+1] <=> $b->[$startcord+1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3802 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3803
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3804 sub popOuter {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3805 # print "\nIN: popOuter @_\n"; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3806 my @all = split ("_X0X_",$_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3807 # <STDIN> if !defined $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3808 my @merged = split ("_X0X_",$_[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3809 my $sequence = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3810 my $seqlen = length($sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3811 my %microstart=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3812 my %microend=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3813 my @mergedSet = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3814 my @nonmerged = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3815
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3816 foreach my $line (@all){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3817 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3818 my $start = $seqlen - $fields[$startcord]+ 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3819 my $end = $seqlen - $fields[$endcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3820 push (@{$microstart{$start}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3821 push (@{$microend{$end}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3822 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3823 my $firstflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3824 my %forPopouting = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3825
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3826 while( my $line =shift(@merged)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3827 # print "\n MErgedline: $line .. startcord = $startcord ... endcord = $endcord\n" ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3828 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3829 my @fields = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3830 my $start = $fields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3831 my $end = $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3832 my $startmicro = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3833 my $endmicro = $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3834
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3835
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3836 delete ($microstart{$start});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3837 delete ($microend{$end});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3838 my $flag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3839 my $startflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3840 my $endflag = 'down';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3841 my $prestart = $start - $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3842 my $postend = $end + $distance;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3843 my @compoundlines = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3844 my %compoundhash = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3845 push (@compoundlines, $line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3846 my $startrank = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3847 my $endrank = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3848
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3849 # print "\nstart = $start, end = $end\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3850 # <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3851 for my $i ($start ... $end){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3852 if(exists $microend{$i}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3853 # print "\nmicrosat exists: $microend{$i}[0] microsat exists\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3854 chomp $microend{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3855 my @fields = split(/\t/,$microend{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3856 delete $microstart{$seqlen - $fields[$startcord] + 1};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3857 my $invertseq = $sequence;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3858 $invertseq =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3859 push(@compoundlines, invert_microsat($microend{$i}[0] , $invertseq ));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3860 delete $microend{$i};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3861
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3862 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3863
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3864 if(exists $microstart{$i} ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3865 # print "\nmicrosat exists: $microstart{$i}[0] microsat exists\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3866
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3867 chomp $microstart{$i}[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3868 my @fields = split(/\t/,$microstart{$i}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3869 delete $microend{$seqlen - $fields[$endcord] + 1};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3870 my $invertseq = $sequence;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3871 $invertseq =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3872 push(@compoundlines, invert_microsat($microstart{$i}[0], $invertseq) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3873 delete $microstart{$i};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3874 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3875 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3876
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3877 if (scalar (@compoundlines) == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3878 push(@mergedSet,join("\t",@compoundlines) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3879 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3880 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3881 # print "FROM POPOUTER\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3882 push(@mergedSet, merge_microsats(@compoundlines, $sequence) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3883 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3884 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3885
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3886 foreach my $key (sort keys %microstart) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3887 push(@nonmerged,$microstart{$key}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3888 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3889
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3890 return (join("_X0X_",@nonmerged). "=".join("_X0X_",@mergedSet) );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3891 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3892
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3893
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3894
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3895 sub invert_justCoordinates{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3896 my $microsat = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3897 # print "IN invert_justCoordinates ... @_\n" ; <STDIN>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3898 chomp $microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3899 my $seqLength = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3900 my @fields = split(/\t/,$microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3901 my $start = $seqLength - $fields[$endcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3902 my $end = $seqLength - $fields[$startcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3903 $fields[$startcord] = $start;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3904 $fields[$endcord] = $end;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3905 $fields[$microsatcord] = reverse_micro($fields[$microsatcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3906 # print "RETURNIG: ", join("\t",@fields), "\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3907 return join("\t",@fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3908 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3909
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3910 sub largest_number{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3911 my $counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3912 my($max) = shift(@_);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3913 foreach my $temp (@_) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3914 #print "finding largest array: $maxcounter \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3915 if($temp > $max){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3916 $max = $temp;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3917 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3918 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3919 return($max);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3920 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3921 sub smallest_number{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3922 my $counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3923 my($min) = shift(@_);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3924 foreach my $temp (@_) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3925 #print "finding largest array: $maxcounter \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3926 if($temp < $min){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3927 $min = $temp;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3928 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3929 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3930 return($min);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3931 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3932
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3933
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3934 sub filterDuplicates_merged{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3935 my @merged = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3936 my %revmerged = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3937 my @fmerged = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3938 foreach my $micro (@merged) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3939 my @fields = split(/\t/,$micro);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3940 if ($fields[3] =~ /chr[A-Z0-9a-z]+r/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3941 my $key = join("_K0K_",$fields[1], $fields[$startcord], $fields[$endcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3942 # print "adding ... \n$key\n$micro\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3943 push(@{$revmerged{$key}}, $micro);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3944 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3945 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3946 # print "pushing.. $micro\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3947 push(@fmerged, $micro);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3948 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3949 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3950 # print "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3951 foreach my $micro (@fmerged) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3952 my @fields = split(/\t/,$micro);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3953 my $key = join("_K0K_",$fields[1], $fields[$startcord], $fields[$endcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3954 # print "searching for key $key\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3955 if (exists $revmerged{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3956 # print "deleting $revmerged{$key}[0]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3957 delete $revmerged{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3958 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3959 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3960 foreach my $key (sort keys %revmerged) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3961 push(@fmerged,$revmerged{$key}[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3962 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3963 # print "returning ", join("\n", @fmerged),"\n" ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3964 return join("__", @fmerged);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3965 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3966
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3967 sub invert_microsat{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3968 my $micro = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3969 chomp $micro;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3970 if ($micro =~ /chr[A-Z0-9a-z]+r/) { $micro =~ s/chr([0-9a-b]+)r/chr$1/g ;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3971 else { $micro =~ s/chr([0-9a-b]+)/chr$1r/g ; }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3972 my $sequence = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3973 $sequence =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3974 my $seqlen = length($sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3975 my @fields = split(/\t/,$micro);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3976 my $start = $seqlen - $fields[$endcord] +1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3977 my $end = $seqlen - $fields[$startcord] +1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3978 $fields[$startcord] = $start;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3979 $fields[$endcord] = $end;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3980 $fields[$motifcord] = reverse_micro($fields[$motifcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3981 $fields[$microsatcord] = reverse_micro($fields[$microsatcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3982 if ($fields[$typecord] ne "compound" && exists $fields[$no_of_interruptionscord] ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3983 my @intertypes = split(/,/,$fields[$interrtypecord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3984 my @inters = split(/,/,$fields[$interrcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3985 my @interposes = split(/,/,$fields[$interr_poscord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3986 $fields[$interrtypecord] = join(",",reverse(@intertypes));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3987 $fields[$no_of_interruptionscord] = scalar(@interposes);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3988 for my $i (0 ... $fields[$no_of_interruptionscord]-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3989 if (exists $inters[$i] && $inters[$i] =~ /[a-zA-Z]/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3990 $inters[$i] = reverse($inters[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3991 $interposes[$i] = $interposes[$i] + length($inters[$i]) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3992 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3993 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3994 $inters[$i] = "";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3995 $interposes[$i] = $interposes[$i] - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3996 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3997 $interposes[$i] = ($end - $start + 1) - $interposes[$i] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3998 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
3999
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4000 $fields[$interrcord] = join(",",reverse(@inters));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4001 $fields[$interr_poscord] = join(",",reverse(@interposes));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4002 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4003
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4004 my $finalmicrosat = join("\t", @fields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4005 return $finalmicrosat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4006
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4007 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4008 sub reverse_micro{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4009 my $micro = reverse($_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4010 my @strand = split(/\s*/,$micro);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4011 for my $i (0 ... $#strand){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4012 if ($strand[$i] =~ /\[/i) {$strand[$i] = "]";next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4013 if ($strand[$i] =~ /\]/i) {$strand[$i] = "[";next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4014 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4015 return join("",@strand);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4016 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4017
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4018 #xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4019
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4020
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4021 #xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4022
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4023 sub forward_reverse_sputoutput_comparer {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4024 # print "IN forward_reverse_sputoutput_comparer: @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4025 my $input0 = $_[0]; ###### the *nogap_unrand_match file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4026 my $input1 = $_[1]; ###### the real file, *sput* data
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4027 my $input2 = $_[2]; ###### the reverse file, *sput* data
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4028 my $output1 = $_[3]; ###### microsats different in real file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4029 my $output2 = $_[4]; ###### microsats missing in real file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4030 my $output3 = $_[5]; ###### microsats common among real and reverse file
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4031 my $no_of_species = $_[6];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4032
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4033 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4034 $typecord = 2 + (4*$no_of_species) + 1 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4035 $startcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4036 $strandcord = 2 + (4*$no_of_species) + 3 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4037 $endcord = 2 + (4*$no_of_species) + 4 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4038 $microsatcord = 2 + (4*$no_of_species) + 5 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4039 $motifcord = 2 + (4*$no_of_species) + 6 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4040 $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4041 $interrtypecord = $motifcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4042 $interrcord = $motifcord + 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4043 $interr_poscord = $motifcord + 3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4044 $no_of_interruptionscord = $motifcord + 4;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4045 $mergestarts = $no_of_interruptionscord+ 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4046 $mergeends = $no_of_interruptionscord+ 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4047 $mergemicros = $no_of_interruptionscord+ 3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4048
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4049
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4050 open(SEQ,"<$input0") or die "Cannot open file $input0 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4051 open(INF,"<$input1") or die "Cannot open file $input1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4052 open(INR,"<$input2") or die "Cannot open file $input2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4053
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4054 open(DIFF,">$output1") or die "Cannot open file $output1 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4055 #open(MISS,">$output2") or die "Cannot open file $output2 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4056 open(SAME,">$output3") or die "Cannot open file $output3 $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4057
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4058
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4059 # print "opened files \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4060 my $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4061 my $fcounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4062 my $rcounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4063
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4064 $printer = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4065 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4066 # NOW ADDING FORWARD MICROSATELLITES TO HASH
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4067 my %fmicros = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4068 my $microcounter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4069 while (my $line = <INF>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4070 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4071 if ($line =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4072 my $key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4073 # print $key, "#-#-#-#-#-#-#-#\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4074 push (@{$fmicros{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4075 $microcounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4076 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4077 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4078 #print $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4079 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4080 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4081 # print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4082 close INF;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4083 my @deletedlines = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4084 # print "done forward hash \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4085 $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4086 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4087 # NOW ADDING REVERSE MICROSATELLITES TO HASH
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4088 my %rmicros = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4089 $microcounter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4090 while (my $line = <INR>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4091 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4092 if ($line =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4093 my $key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4094 # print $key, "#-#-#-#-#-#-#-#\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4095 push (@{$rmicros{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4096 $microcounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4097 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4098 else {}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4099 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4100 # print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4101 close INR;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4102 # print "done reverse hash \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4103 $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4104 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4105 #---------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4106 # NOW READING THE SEQUENCE FILE
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4107 while(my $sine = <SEQ>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4108 my %microstart=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4109 my %microend=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4110 my @sields = split(/\t/,$sine);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4111 my $key = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4112 if ($sine =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4113 $key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4114 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4115 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4116 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4117 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4118 $printer = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4119 my $sequence = $sields[$sequencepos];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4120 chomp $sequence;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4121 $sequence =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4122 my @localfs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4123 my @localrs = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4124
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4125 if (exists $fmicros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4126 @localfs = @{$fmicros{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4127 delete $fmicros{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4128 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4129
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4130 my %forwardstarts = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4131 my %forwardends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4132
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4133 foreach my $f (@localfs){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4134 my @fields = split(/\t/,$f);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4135 push (@{$forwardstarts{$fields[$startcord]}},$f);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4136 push (@{$forwardends{$fields[$endcord]}},$fields[$startcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4137 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4138
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4139 if (exists $rmicros{$key}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4140 @localrs = @{$rmicros{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4141 delete $rmicros{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4142 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4143 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4144 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4145
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4146 foreach my $r (@localrs){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4147 chomp $r;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4148 my @rields = split(/\t/,$r);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4149 # print "rields = @rields\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4150 my $reciprocalstart = length($sequence) - $rields[$endcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4151 my $reciprocalend = length($sequence) - $rields[$startcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4152 # print "reciprocal start = $reciprocalstart = ",length($sequence)," - $rields[$endcord] + 1\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4153 my $microsat = reverse_micro(all_caps($rields[$microsatcord]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4154 my @localcollection=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4155 for my $i ($reciprocalstart+1 ... $reciprocalend-1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4156 if (exists $forwardstarts{$i}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4157 push(@localcollection, $forwardstarts{$i}[0] );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4158 delete $forwardstarts{$i};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4159 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4160 if (exists $forwardends{$i}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4161 next if !exists $forwardstarts{$forwardends{$i}[0]};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4162 push(@localcollection, $forwardstarts{$forwardends{$i}[0]}[0] );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4163 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4164 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4165 if (exists $forwardstarts{$reciprocalstart} && exists $forwardends{$reciprocalend}) {push(@localcollection,$forwardstarts{$reciprocalstart}[0]);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4166
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4167 if (scalar(@localcollection) == 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4168 print SAME invert_microsat($r,($sequence) ), "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4169 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4170
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4171 elsif (scalar(@localcollection) == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4172 # print "f microsat = $localcollection[0]\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4173 my @lields = split(/\t/,$localcollection[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4174 $lields[$microsatcord]=all_caps($lields[$microsatcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4175 # print "comparing: $microsat and $lields[$microsatcord]\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4176 # print "coordinates are: $lields[$startcord]-$lields[$endcord] and $reciprocalstart-$reciprocalend\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4177 if ($microsat eq $lields[$microsatcord]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4178 chomp $localcollection[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4179 print SAME $localcollection[0], "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4180 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4181 if ($microsat ne $lields[$microsatcord]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4182 chomp $localcollection[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4183 my $newmicro = microsatChooser(join("\t",@lields), join("\t",@rields), $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4184 # print "newmicro = $newmicro\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4185 if ($newmicro =~ /[a-zA-Z]/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4186 print SAME $newmicro,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4187 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4188 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4189 print DIFF join("\t",$localcollection[0],"-->",$rields[$typecord],$reciprocalstart,$reciprocalend, $rields[$microsatcord], reverse_micro($rields[$motifcord]), @rields[$motifcord+1 ... $#rields] ),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4190 # print join("\t",$localcollection[0],"-->",$rields[$typecord],$reciprocalstart,$reciprocalend, $rields[$microsatcord], reverse_micro($rields[$motifcord]), @rields[$motifcord+1 ... $#rields] ),"\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4191 # print "@rields\n@lields\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4192 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4193 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4194 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4195 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4196 # print "multiple found for $r --> ", join("\t",@localcollection),"\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4197 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4198 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4199 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4200
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4201 close(SEQ);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4202 close(INF);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4203 close(INR);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4204 close(DIFF);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4205 close(SAME);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4206
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4207 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4208 sub all_caps{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4209 my @strand = split(/\s*/,$_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4210 for my $i (0 ... $#strand){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4211 if ($strand[$i] =~ /c/) {$strand[$i] = "C";next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4212 if ($strand[$i] =~ /a/) {$strand[$i] = "A";next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4213 if ($strand[$i] =~ /t/) { $strand[$i] = "T";next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4214 if ($strand[$i] =~ /g/) {$strand[$i] = "G";next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4215 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4216 return join("",@strand);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4217 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4218 sub microsatChooser{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4219 my $forward = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4220 my $reverse = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4221 my $sequence = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4222 my $seqLength = length($sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4223 $sequence =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4224 my @fields = split(/\t/,$forward);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4225 my @rields = split(/\t/,$reverse);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4226 my $r_start = $seqLength - $rields[$endcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4227 my $r_end = $seqLength - $rields[$startcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4228
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4229
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4230 my $f_microsat = $fields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4231 my $r_microsat = $rields[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4232
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4233 if ($fields[$typecord] =~ /\./ && $rields[$typecord] =~ /\./) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4234 return $forward if length($f_microsat) >= length($r_microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4235 return invert_microsat($reverse, $sequence) if length($f_microsat) < length($r_microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4236 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4237 return $forward if all_caps($fields[$motifcord]) eq all_caps($rields[$motifcord]) && $fields[$startcord] == $rields[$startcord] && $fields[$endcord] == $rields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4238
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4239 my $f_microsat_copy = $f_microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4240 my $r_microsat_copy = $r_microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4241 $f_microsat_copy =~ s/^\[|\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4242 $r_microsat_copy =~ s/^\[|\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4243
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4244 my @f_microields = split(/\][a-zA-Z]*\[/,$f_microsat_copy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4245 my @r_microields = split(/\][a-zA-Z]*\[/,$r_microsat_copy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4246 my @f_intields = split(/\][a-zA-Z]*\[/,$f_microsat_copy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4247 my @r_intields = split(/\][a-zA-Z]*\[/,$r_microsat_copy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4248
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4249 my $f_motif = $fields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4250 my $r_motif = $rields[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4251 my $f_motif_copy = $f_motif;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4252 my $r_motif_copy = $r_motif;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4253 $f_motif_copy =~ s/^\[|\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4254 $r_motif_copy =~ s/^\[|\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4255
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4256 my @f_motields = split(/\]\[/,$f_motif_copy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4257 my @r_motields = split(/\]\[/,$r_motif_copy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4258
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4259 my $f_purestretch = join("",@f_microields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4260 my $r_purestretch = join("",@r_microields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4261
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4262 if ($fields[$typecord]=~/nucleotide/ && $rields[$typecord]=~/nucleotide/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4263 # print "now.. studying $forward\n$reverse\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4264 if ($fields[$typecord] eq $rields[$typecord]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4265 # print "comparing motifs::", all_caps($fields[$motifcord]) ," and ", all_caps(reverse_micro($rields[$motifcord])), "\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4266
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4267 if(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4268 my $subset_answer = isSubset($forward, $reverse, $seqLength);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4269 # print "subset answer = $subset_answer\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4270 return $forward if $subset_answer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4271 return invert_microsat($reverse, $sequence) if $subset_answer == 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4272 return $forward if $subset_answer == 0 && length($f_purestretch) >= length($r_purestretch);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4273 return invert_microsat($reverse, $sequence) if $subset_answer == 0 && length($f_purestretch) < length($r_purestretch);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4274 return $forward if $subset_answer == 3 && slided_microsat($forward, $reverse, $seqLength) == 0 && length($f_purestretch) >= length($r_purestretch);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4275 return invert_microsat($reverse, $sequence) if $subset_answer == 3 && slided_microsat($forward, $reverse, $seqLength) == 0 && length($f_purestretch) < length($r_purestretch);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4276 return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence) if $subset_answer == 3 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4277 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4278 elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4279 return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4280 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4281 elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 2){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4282 return $forward;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4283 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4284 elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 3){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4285 return invert_microsat($reverse, $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4286 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4287 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4288 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4289 my $fmotlen = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4290 my $rmotlen = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4291 $fmotlen =1 if $fields[$typecord] eq "mononucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4292 $fmotlen =2 if $fields[$typecord] eq "dinucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4293 $fmotlen =3 if $fields[$typecord] eq "trinucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4294 $fmotlen =4 if $fields[$typecord] eq "tetranucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4295 $rmotlen =1 if $rields[$typecord] eq "mononucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4296 $rmotlen =2 if $rields[$typecord] eq "dinucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4297 $rmotlen =3 if $rields[$typecord] eq "trinucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4298 $rmotlen =4 if $rields[$typecord] eq "tetranucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4299
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4300 if ($fmotlen < $rmotlen){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4301 if (abs($fields[$startcord] - $r_start) <= $fmotlen || abs($fields[$endcord] - $r_end) <= $fmotlen ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4302 return $forward;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4303 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4304 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4305 return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4306 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4307 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4308 if ($fmotlen > $rmotlen){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4309 if (abs($fields[$startcord] - $r_start) <= $rmotlen || abs($fields[$endcord] - $r_end) <= $rmotlen){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4310 return invert_microsat($reverse, $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4311 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4312 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4313 return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4314 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4315 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4316 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4317 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4318 if ($fields[$typecord] eq "compound" && $rields[$typecord] eq "compound"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4319 # print "comparing compound motifs::", all_caps($fields[$motifcord]) ," and ", all_caps(reverse_micro($rields[$motifcord])), "\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4320 if(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4321 my $subset_answer = isSubset($forward, $reverse, $seqLength);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4322 # print "subset answer = $subset_answer\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4323 return $forward if $subset_answer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4324 return invert_microsat($reverse, $sequence) if $subset_answer == 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4325 # print length($f_purestretch) ,">", length($r_purestretch)," \n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4326 return $forward if $subset_answer == 0 && length($f_purestretch) >= length($r_purestretch);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4327 return invert_microsat($reverse, $sequence) if $subset_answer == 0 && length($f_purestretch) < length($r_purestretch);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4328 if ($subset_answer == 3){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4329 if ($fields[$startcord] < $r_start || $fields[$endcord] > $r_end){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4330 if (abs($fields[$startcord] - $r_start) < length($f_motields[0]) || abs($fields[$endcord] - $r_end) < length($f_motields[$#f_motields]) ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4331 return $forward;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4332 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4333 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4334 return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4335 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4336 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4337 if ($fields[$startcord] > $r_start || $fields[$endcord] < $r_end){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4338 if (abs($fields[$startcord] - $r_start) < length($r_motields[0]) || abs($fields[$endcord] - $r_end) < length($r_motields[$#r_motields]) ){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4339 return invert_microsat($reverse, $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4340 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4341 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4342 return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4343 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4344 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4345 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4346 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4347 elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4348 return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4349 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4350 elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 2){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4351 return $forward;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4352 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4353 elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 3){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4354 return invert_microsat($reverse, $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4355 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4356
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4357 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4358
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4359 if ($fields[$typecord] eq "compound" && $rields[$typecord] =~ /nucleotide/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4360 # print "one compound, one nucleotide\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4361 return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4362 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4363 if ($fields[$typecord] =~ /nucleotide/ && $rields[$typecord]eq "compound"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4364 # print "one compound, one nucleotide\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4365 return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4366 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4367 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4368
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4369 sub isSubset{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4370 my $forward = $_[0]; my @fields = split(/\t/,$forward);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4371 my $reverse = $_[1]; my @rields = split(/\t/,$reverse);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4372 my $seqLength = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4373 my $r_start = $seqLength - $rields[$endcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4374 my $r_end = $seqLength - $rields[$startcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4375 # print "we have $fields[$startcord] -> $fields[$endcord] && $r_start -> $r_end\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4376 return "0" if $fields[$startcord] == $r_start && $fields[$endcord] == $r_end;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4377 return "1" if $fields[$startcord] <= $r_start && $fields[$endcord] >= $r_end;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4378 return "2" if $r_start <= $fields[$startcord] && $r_end >= $fields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4379 return "3";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4380 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4381
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4382 sub motifBYmotif_match{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4383 my $forward = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4384 my $reverse = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4385 $forward =~ s/^\[|\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4386 $reverse =~ s/^\[|\]$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4387 my @f_motields=split(/\]\[/, $forward);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4388 my @r_motields=split(/\]\[/, $reverse);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4389 my $finalresult = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4390
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4391 if (scalar(@f_motields) != scalar(@r_motields)){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4392 my $subresult = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4393 my @mega = (); my @sub = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4394 @mega = @f_motields if scalar(@f_motields) > scalar(@r_motields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4395 @sub = @f_motields if scalar(@f_motields) > scalar(@r_motields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4396 @mega = @r_motields if scalar(@f_motields) < scalar(@r_motields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4397 @sub = @r_motields if scalar(@f_motields) < scalar(@r_motields);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4398
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4399 for my $i (0 ... $#sub){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4400 my $probe = $sub[$i].$sub[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4401 # print "probing $probe and $mega[$i]\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4402 if ($probe =~ /$mega[$i]/) {$subresult = 1; }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4403 else {$subresult = 0; last; }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4404 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4405
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4406 return 0 if $subresult == 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4407 return 2 if $subresult == 1 && scalar(@f_motields) > scalar(@r_motields); # r is subset of f
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4408 return 3 if $subresult == 1 && scalar(@f_motields) < scalar(@r_motields); # ^reverse
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4409
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4410 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4411 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4412 for my $i (0 ... $#f_motields){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4413 my $probe = $f_motields[$i].$f_motields[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4414 if ($probe =~ /$r_motields[$i]/) {$finalresult = 1 ;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4415 else {$finalresult = 0 ;last;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4416 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4417 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4418 # print "finalresult = $finalresult\n" if $printer == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4419 return $finalresult;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4420 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4421
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4422 sub merge_microsats{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4423 my @input = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4424 my $sequence = pop(@input);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4425 $sequence =~ s/ //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4426 my @seq_string = @input;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4427 # print "IN: merge_microsats\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4428 # print "recieved for merging: ", join("\n", @seq_string), "\nsequence = $sequence\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4429 my $start;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4430 my $end;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4431 my @micros = map { $_->[0] } sort custom map { [$_, split /\t/ ] } @seq_string;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4432 # print "\nrearranged into @micros \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4433 my (@motifs, @microsats, @interruptiontypes, @interruptions, @interrposes, @no_of_interruptions, @types, @starts, @ends, @mergestart, @mergeend, @mergemicro) = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4434 my @fields = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4435 for my $i (0 ... $#micros){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4436 chomp $micros[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4437 @fields = split(/\t/,$micros[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4438 push(@types, $fields[$typecord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4439 push(@motifs, $fields[$motifcord]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4440
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4441 if (exists $fields[$interrtypecord]){ push(@interruptiontypes, $fields[$interrtypecord]);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4442 else { push(@interruptiontypes, "NA"); }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4443 if (exists $fields[$interrcord]) {push(@interruptions, $fields[$interrcord]);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4444 else { push(@interruptions, "NA"); }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4445 if (exists $fields[$interr_poscord]) { push(@interrposes, $fields[$interr_poscord]);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4446 else { push(@interrposes, "NA"); }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4447 if (exists $fields[$no_of_interruptionscord]) {push(@no_of_interruptions, $fields[$no_of_interruptionscord]);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4448 else { push(@no_of_interruptions, "NA"); }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4449 if(exists $fields[$mergestarts]) { @mergestart = (@mergestart, split(/\./,$fields[$mergestarts]));}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4450 else { push(@mergestart, $fields[$startcord]); }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4451 if(exists $fields[$mergeends]) { @mergeend = (@mergeend, split(/\./,$fields[$mergeends]));}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4452 else { push(@mergeend, $fields[$endcord]); }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4453 if(exists $fields[$mergemicros]) { push(@mergemicro, $fields[$mergemicros]);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4454 else { push(@mergemicro, $fields[$microsatcord]); }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4455
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4456
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4457 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4458 $start = smallest_number(@mergestart);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4459 $end = largest_number(@mergeend);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4460 my $microsat_entry = "[".substr( $sequence, $start-1, ($end - $start + 1) )."]";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4461 my $microsat = join("\t", @fields[0 ... $infocord], join(".", @types), $start, $fields[$strandcord], $end, $microsat_entry , join(".", @motifs), join(".", @interruptiontypes),join(".", @interruptions),join(".", @interrposes),join(".", @no_of_interruptions), join(".", @mergestart), join(".", @mergeend) , join(".", @mergemicro));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4462 return $microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4463 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4464
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4465 sub slided_microsat{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4466 my $forward = $_[0]; my @fields = split(/\t/,$forward);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4467 my $reverse = $_[1]; my @rields = split(/\t/,$reverse);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4468 my $seqLength = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4469 my $r_start = $seqLength - $rields[$endcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4470 my $r_end = $seqLength - $rields[$startcord] + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4471 my $motlen =();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4472 $motlen =1 if $fields[$typecord] eq "mononucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4473 $motlen =2 if $fields[$typecord] eq "dinucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4474 $motlen =3 if $fields[$typecord] eq "trinucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4475 $motlen =4 if $fields[$typecord] eq "tetranucleotide";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4476
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4477 if (abs($fields[$startcord] - $r_start) < $motlen || abs($fields[$endcord] - $r_end) < $motlen ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4478 return 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4479 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4480 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4481 return 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4482 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4483
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4484 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4485
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4486 #xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4487
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4488
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4489
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4490 #xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4491 sub new_multispecies_t10{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4492 my $input1 = $_[0]; #gap_op_unrand_match
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4493 my $input2 = $_[1]; #sput
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4494 my $output = $_[2]; #output
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4495 my $bin = $output."_bin";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4496 my $orgs = join("|",split(/\./,$_[3]));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4497 my @organisms = split(/\./,$_[3]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4498 my $no_of_species = scalar(@organisms); #3
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4499 my $t10info = $output."_info";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4500 $prinkter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4501
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4502 open (MATCH, "<$input1");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4503 open (SPUT, "<$input2");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4504 open (OUT, ">$output");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4505 open (INFO, ">$t10info");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4506
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4507
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4508 sub microsat_bracketer;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4509 sub custom;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4510 my %seen = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4511 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4512 $typecord = 2 + (4*$no_of_species) + 1 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4513 $startcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4514 $strandcord = 2 + (4*$no_of_species) + 3 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4515 $endcord = 2 + (4*$no_of_species) + 4 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4516 $microsatcord = 2 + (4*$no_of_species) + 5 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4517 $motifcord = 2 + (4*$no_of_species) + 6 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4518 $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4519 #---------------------------------------------------------------------------------------------------------------#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4520 # MAKING A HASH FROM SPUT, WITH HASH KEYS GENERATED BELOW AND SEQUENCES STORED AS VALUES #
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4521 #---------------------------------------------------------------------------------------------------------------#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4522 my $linecounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4523 my $microcounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4524 while (my $line = <SPUT>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4525 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4526 # print "$org\t(chr[0-9]+)\t([0-9]+)\t([0-9])+\t \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4527 next if $line !~ /[0-9a-z]+/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4528 $linecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4529 # my $key = join("\t",$1 , $2, $4, $5, $6, $8, $9, $10, $12, $13);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4530 # print $key, "#-#-#-#-#-#-#-#\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4531 if ($line =~ /([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4532 my $key = join("\t",$1, $2, $3, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4533 # print "key = $key\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4534 push (@{$seen{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4535 $microcounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4536 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4537 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4538 #print "could not make ker in SPUT : \n$line \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4539 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4540 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4541 # print "done hash.. linecounter = $linecounter, microcounter = $microcounter and total keys entered = ",scalar(keys %seen),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4542 # print INFO "done hash.. linecounter = $linecounter, microcounter = $microcounter and total keys entered = ",scalar(keys %seen),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4543 close SPUT;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4544
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4545 #----------------------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4546
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4547 #-------------------------------------------------------------------------------------------------------#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4548 # THE ENTIRE CODE BELOW IS DEVOTED TO GENERATING HASH KEYS FROM MATCH FOLLOWED BY #
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4549 # USING THESE HASH KEYS TO CORRESPOND EACH SEQUENCE IN FIRST FILE TO ITS MICROSAT REPEATS IN #
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4550 # SECOND FILE FOLLOWED BY #
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4551 # FINDING THE EXACT LOCATION OF EACH MICROSAT REPEAT WITHIN EACH SEQUENCE USING THE 'index' FUNCTION #
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4552 #-------------------------------------------------------------------------------------------------------#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4553 my $ref = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4554 my $ref2 = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4555 my $ref3 = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4556 my $ref4 = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4557 my $deletes= 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4558 my $duplicates = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4559 my $neighbors = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4560 my $tooshort = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4561 my $prevmicrol=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4562 my $startnotfound = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4563 my $matchkeysformed = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4564 my $keysused = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4565
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4566 while (my $line = <MATCH>) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4567 # print colored ['magenta'], $line if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4568 next if $line !~ /[a-zA-Z0-9]/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4569 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4570 my @fields2 = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4571 my $key2 = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4572 # $key2 = join("\t",$1 , $2, $4, $5, $6, $8, $9, $10, $12, $13);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4573 if ($line =~ /([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4574 $matchkeysformed++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4575 $key2 = join("\t",$1, $2, $3, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4576 # print "key = $key2 \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4577 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4578 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4579 # print "could not make ker in SEQ : $line\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4580 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4581 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4582 my $sequence = $fields2[$sequencepos];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4583 $sequence =~ s/\*/-/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4584 my $count = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4585 if (exists $seen{$key2}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4586 $keysused++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4587 my @unsorted_raw = @{$seen{$key2}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4588 delete $seen{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4589 my @sequencearr = split(/\s*/, $sequence);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4590
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4591 # print "sequencearr = @sequencearr\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4592
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4593 my $counter;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4594
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4595 my %start_database = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4596 my %end_database = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4597 foreach my $uns (@unsorted_raw){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4598 my @uields = split(/\t/,$uns);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4599 $start_database{$uields[$startcord]} = $uns;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4600 $end_database{$uields[$endcord]} = $uns;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4601 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4602
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4603 my @unsorted = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4604 my %starts = (); my %ends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4605 # print colored ['yellow'],"unsorted, unfiltered microats = \n" if $prinkter == 1; foreach (@unsorted_raw) {print colored ['blue'],$_,"\n" if $prinkter == 1;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4606 for my $u (0 ... $#unsorted_raw){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4607 my @tields = split(/\t/,$unsorted_raw[$u]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4608 next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4609 push(@unsorted, $unsorted_raw[$u]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4610 $starts{$tields[$startcord]} = $unsorted_raw[$u];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4611 # print "in starts : $tields[$startcord] -> $unsorted_raw[$u]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4612 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4613
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4614 my $basecounter= 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4615 my $gapcounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4616 my $poscounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4617
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4618 for my $s (@sequencearr){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4619
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4620 $poscounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4621 if ($s eq "-"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4622 $gapcounter++; next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4623 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4624 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4625 $basecounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4626 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4627
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4628
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4629 #print "s = $s, poscounter = $poscounter, basecounter = $basecounter, gapcpunter = $gapcounter\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4630 #print "s = $s, basecounter = $basecounter, gapcpunter = $gapcounter\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4631 #print "s = $s, gapcpunter = $gapcounter\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4632
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4633 if (exists $starts{$basecounter}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4634 my $locus = $starts{$basecounter};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4635 # print "locus identified = $locus\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4636 my @fields3 = split(/\t/,$locus);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4637 my $start = $fields3[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4638 my $end = $fields3[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4639 my $motif = $fields3[$motifcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4640 my $microsat = $fields3[$microsatcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4641 my @leftbracketpos = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4642 my @rightbracketpos = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4643 my $bracket_picker = 'no';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4644 my $leftbrackets=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4645 my $rightbrackets = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4646 my $micro_cpy = $microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4647 # print "microsat = $microsat\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4648 while($microsat =~ m/\[/g) {push(@leftbracketpos, (pos($microsat))); $leftbrackets = join("__",@leftbracketpos);$bracket_picker='yes';}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4649 while($microsat =~ m/\]/g) {push(@rightbracketpos, (pos($microsat))); $rightbrackets = join("__",@rightbracketpos);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4650 $microsat =~ s/[\[\]\-\*]//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4651 # print "microsat = $microsat\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4652 my $human_search = join '-*', split //, $microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4653 my $temp = substr($sequence, $poscounter-1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4654 # print "with poscounter = $poscounter\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4655 my $search_result = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4656 my $posnow = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4657
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4658 # print "for $line, temp $temp or human_search $human_search not defined\n" if !defined $temp || !defined $human_search;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4659 # <STDIN> if !defined $temp || !defined $human_search;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4660
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4661 while ($temp =~ /($human_search)/gi){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4662 $search_result = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4663 # $posnow = pos($temp);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4664 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4665 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4666
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4667 my @gapspos = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4668 next if !defined $search_result;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4669
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4670 while($search_result =~ m/-/g) {push(@gapspos, (pos($search_result))); }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4671 my $gaps = join("__",@gapspos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4672
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4673 my $final_microsat = $search_result;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4674 if ($bracket_picker eq "yes"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4675 $final_microsat = microsat_bracketer($search_result, $gaps,$leftbrackets,$rightbrackets);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4676 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4677
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4678 my $outsentence = join("\t",join ("\t",@fields3[0 ... $infocord]),$fields3[$typecord],$fields3[$motifcord],$gapcounter,$poscounter,$fields3[$strandcord],$poscounter + length($search_result) -1 ,$final_microsat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4679
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4680 if ($bracket_picker eq "yes") {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4681 $outsentence = $outsentence."\t".join("\t",@fields3[($motifcord+1) ... $#fields3]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4682 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4683 print OUT $outsentence,"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4684 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4685 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4686 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4687 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4688 my $unusedkeys = scalar(keys %seen);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4689 # print INFO "in hash = $ref, looped = $ref4, captured = $ref3\n REMOVED: \nmicrosats with too long gaps = $deletes\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4690 # print INFO "exact duplicated removed = $duplicates \nmicrosats removed due to multiple microsats defined in +-10 bp neighboring region: $neighbors \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4691 # print INFO "microsatellites too short = $tooshort\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4692 # print INFO "keysused = $keysused...starts not found = $startnotfound ... matchkeysformed=$matchkeysformed ... unusedkeys=$unusedkeys\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4693
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4694 #print "in hash = $ref, looped = $ref4, captured = $ref3\n REMOVED: \nmicrosats with too long gaps = $deletes\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4695 #print "exact duplicated removed = $duplicates \nmicrosats removed due to multiple microsats defined in +-10 bp neighboring region: $neighbors \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4696 #print "microsatellites too short = $tooshort\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4697 #print "keysused = $keysused...starts not found = $startnotfound ... matchkeysformed=$matchkeysformed ... unusedkeys=$unusedkeys\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4698 #print "unused keys = \n",join("\n", (keys %seen)),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4699 close (MATCH);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4700 close (SPUT);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4701 close (OUT);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4702 close (INFO);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4703 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4704
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4705 sub microsat_bracketer{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4706 # print "in bracketer: @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4707 my ($microsat, $gapspos, $leftbracketpos, $rightbracketpos) = @_;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4708 my @gaps = split(/__/,$gapspos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4709 my @lefts = split(/__/,$leftbracketpos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4710 my @rights = split(/__/,$rightbracketpos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4711 my @new=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4712 my $pure = $microsat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4713 $pure =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4714 my $off = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4715 my $finallength = length($microsat) + scalar(@lefts)+scalar(@rights);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4716 push(@gaps, 0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4717 push(@lefts,0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4718 push(@rights,0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4719
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4720 for my $i (1 ... $finallength){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4721 # print "1 current i = >$i<>, right = >$rights[0]< gap = $gaps[0] left = >$lefts[0]< and $rights[0] == $i\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4722 if($rights[0] == $i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4723 # print "pushed a ]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4724 push(@new, "]");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4725 shift(@rights);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4726 push(@rights,0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4727 for my $j (0 ... scalar(@gaps)-1) {$gaps[$j]++;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4728 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4729 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4730 if($gaps[0] == $i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4731 # print "pushed a -\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4732 push(@new, "-");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4733 shift(@gaps);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4734 push(@gaps, 0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4735 for my $j (0 ... scalar(@rights)-1) {$rights[$j]++;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4736 for my $j (0 ... scalar(@lefts)-1) {$lefts[$j]++;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4737
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4738 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4739 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4740 if($lefts[0] == $i){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4741 # print "pushed a [\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4742 push(@new, "[");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4743 shift(@lefts);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4744 push(@lefts,0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4745 for my $j (0 ... scalar(@gaps)-1) {$gaps[$j]++;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4746 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4747 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4748 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4749 my $pushed = substr($pure,$off,1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4750 $off++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4751 push(@new,$pushed );
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4752 # print "pushed an alphabet, now new = @new, pushed = $pushed\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4753 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4754 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4755 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4756 my $returnmicrosat = join("",@new);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4757 # print "final microsat = $returnmicrosat \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4758 return($returnmicrosat);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4759 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4760
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4761 #xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4762
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4763
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4764 #xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4765 sub multiSpecies_orthFinder4{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4766 #print "IN multiSpecies_orthFinder4: @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4767 my @handles = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4768 #1 SEPT 30TH 2008
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4769 #2 THIS CODE (multiSpecies_orthFinder4.pl) IS BEING MADE SO THAT IN THE REMOVAL OF MICROSATELLITES THAT ARE CLOSER TO EACH OTHER
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4770 #3 THAN 50 BP (HE 50BP RADIUS OF EXCLUSION), WE ARE LOOKING ACCROSS ALIGNMENT BLOCKS.. AND NOT JUST LOOKING WITHIN THE ALIGNMENT BLOCKS. THIS WILL
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4771 #4 POTENTIALLY REMOVE EVEN MORE MICROSATELLITES THAN BEFORE, BUT THIS WILL RESCUE THOSE MICROSATELLITES THAT WERE LOST
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4772 #5 DUE TO OUR PREVIOUS REQUIREMENT FROM VERSION 3, THAT MICROSATELLITES THAT ARE CLOSER TO THE BOUNDARY THAN 25 BP NEED TO BE REMOVED
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4773 #6 SUCH A REQUIREMENT WAS A CRUDE WAY TO IMPOSE THE ABOVE 50 BP RADIUS OF EXCLUSION ACCROSS THE ALIGNMENT BLOCKS WITHOUT ACTUALLY
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4774 #7 CHECKING COORDINATES OF THE EXCLUDED MICROSATELLITES.
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4775 #8 IN ORDER TO TAKE CARE OF THE CASES WHERE MICROSATELLITES ARE PRELIOUSLY CLOSE TO ENDS OF THE ALIGNMENT BLOCKS, WE IMPOSE HERE
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4776 #9 A NEW REQUIREMENT THAT FOR A MICROSATELLITE TO BE CONSIDERED, ALL THE SPECIES NEED TO HAVE AT LEAST 10 BP OF NON-MICROSATELLITE SEQUENCE
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4777 #10 ON EITHER SIDE OF IT.. GAPLESS. THIS INFORMATION IS STORED IN THE VARIABLE: $FLANK_SUPPORT. THIS PART, INSTEAD OF BEING INCLUDED IN
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4778 #11 THIS CODE, WILL BE INCLUDED IN A NEW CODE THAT WE WILL BE WRITING AS PART OF THE PIPELINE: multiSpecies_microsatSetSelector.pl
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4779
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4780 #1 trial run:
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4781 #2 perl ../../../codes/multiSpecies_orthFinder4.pl /gpfs/home/ydk104/work/rhesus_microsat/axtNet/hg18.panTro2.ponAbe2.rheMac2.calJac1/chr22.hg18.panTro2.ponAbe2.rheMac2.calJac1.net.axt H.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:C.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:O.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:R.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:M.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2 orth22 hg18:panTro2:ponAbe2:rheMac2:calJac1 50
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4782
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4783 $prinkter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4784
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4785 #############
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4786 my $CLUSTER_DIST = $_[4];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4787 #############
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4788
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4789
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4790 my $aligns = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4791 my @micros = split(/:/, $_[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4792 my $orth = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4793 #my $not_orth = "notorth";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4794 @tags = split(/:/, $_[3]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4795
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4796 $no_of_species=scalar(@tags);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4797 my $junkfile = $orth."_junk";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4798 #open(JUNK,">$junkfile");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4799
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4800 #my $info = $output1."_info";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4801 #print "inputs are : \n"; foreach(@micros){print $_,"\n";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4802 #print "info = @_\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4803
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4804
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4805 open (BO, "<$aligns") or die "Cannot open alignment file: $aligns: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4806 open (ORTH, ">$orth");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4807 my $output=$orth."_out";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4808 open (OUTP, ">$output");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4809
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4810
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4811 #open (NORTH, ">$not_orth");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4812 #open (INF, ">$info");
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4813 my $i = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4814 foreach my $path (@micros){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4815 $handles[$i] = IO::Handle->new();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4816 open ($handles[$i], "<$path") or die "Can't open microsat file $path : $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4817 $i++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4818 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4819
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4820 #print "Opened files\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4821
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4822
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4823 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4824 $typecord = 2 + (4*$no_of_species) + 1 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4825 $motifcord = $typecord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4826 $gapcord = $motifcord+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4827 $startcord = $gapcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4828 $strandcord = $startcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4829 $endcord = $strandcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4830 $microsatcord = $endcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4831 $sequencepos = 2 + (4*$no_of_species) + 1 -1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4832 #$sequencepos = 17;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4833 # GENERATING HASHES CONTAINING CHIMP AND HUMAN DATA FROM ABOVE FILES
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4834 #----------------------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4835 my @hasharr = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4836 foreach my $path (@micros){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4837 open(READ, "<$path") or die "Cannot open file $path :$!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4838 my %single_hash = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4839 my $key = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4840 my $counter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4841 while (my $line = <READ>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4842 $counter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4843 # print $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4844 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4845 my @fields1 = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4846 if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4847 $key = join("\t",$1, $2, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4848
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4849 # print "key = : $key\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4850
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4851 # print $line if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4852 push (@{$single_hash{$key}},$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4853 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4854 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4855 # print "microsat line incompatible\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4856 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4857 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4858 push @hasharr, {%single_hash};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4859 # print "@{$single_hash{$key}} \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4860 # print "done $path: counter = $counter\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4861 close READ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4862 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4863 # print "Done hashes\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4864 #----------------------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4865 my $question=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4866 #----------------------------------------------------------------------------------------------------------------
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4867 my @contigstarts = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4868 my @contigends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4869
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4870 my %contigclusters = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4871 my %contigclustersFirstStartOnly=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4872 my %contigclustersLastEndOnly=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4873 my %contigclustersLastEndLengthOnly=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4874 my %contigclustersFirstStartLengthOnly=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4875 my %contigpath=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4876 my $dotcounter = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4877 while (my $line = <BO>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4878 # print "x" x 60, "\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4879 $dotcounter++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4880 # print "." if $dotcounter % 100 ==0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4881 # print "\n" if $dotcounter % 5000 ==0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4882 next if $line !~ /^[0-9]+/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4883 # print $line if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4884 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4885 my @fields2 = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4886 my $key2 = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4887 if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4888 $key2 = join("\t",$1, $2, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4889 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4890 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4891 # print "seq line $line incompatible\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4892 next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4893
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4894
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4895
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4896
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4897
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4898
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4899 my @sequences = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4900 for (0 ... $#tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4901 my $seq = <BO>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4902 # print $seq;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4903 chomp $seq;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4904 push(@sequences , " ".$seq);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4905 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4906 my @origsequences = @sequences;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4907 my $seqcopy = $sequences[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4908 my @strings = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4909 $seqcopy =~ s/[a-zA-Z]|-/x/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4910 my @string = split(/\s*/,$seqcopy);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4911
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4912 for my $s (0 ... $#tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4913 $sequences[$s] =~ s/-//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4914 $sequences[$s] =~ s/[a-zA-Z]/x/g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4915 # print "length of sequence = ",length($sequences[$s]),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4916 my @tempstring = split(/\s*/,$sequences[$s]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4917 push(@strings, [@tempstring])
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4918
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4919 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4920
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4921 my @species_list = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4922 my @micro_count = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4923 my @starthash = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4924 my $stopper = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4925 my @endhash = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4926
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4927 my @currentcontigstarts=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4928 my @currentcontigends=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4929 my @currentcontigchrs=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4930
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4931 for my $i (0 ... $#tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4932 # print "searching for : if exists hasharr: $i : $tags[$i] : $key2 \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4933 my @temparr = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4934
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4935 if (exists $hasharr[$i]{$key2}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4936 @temparr = @{$hasharr[$i]{$key2}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4937
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4938 $line =~ /$tags[$i]\s([a-zA-Z0-9_]+)\s([0-9]+)\s([0-9]+)/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4939 ## print "in line $line, trying to hunt for: $tags[$i]\\s([a-zA-Z0-9_])+\\s([0-9]+)\\s([0-9]+) \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4940 # print "org = $tags[$i], and chr = $1, start = $2, end =$3 \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4941 my $startkey = $1."_SK0SK_".$2; #print "adding start key for this alignmebt block: $startkey to species $tags[$i]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4942 my $endkey = $1."_EK0EK_".$3; #print "adding end key for this alignmebt block: $endkey to species $tags[$i]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4943 $contigstarts[$i]{$startkey}= $key2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4944 $contigends[$i]{$endkey}= $key2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4945 # print "confirming existance: \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4946 # print "present \n" if exists $contigends[$i]{$endkey} && $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4947 # print "absent \n" if !exists $contigends[$i]{$endkey} && $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4948 $currentcontigchrs[$i]=$1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4949 $currentcontigstarts[$i]=$2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4950 $currentcontigends[$i]=$3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4951
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4952 } # print "exists: @{$hasharr[$i]{$key2}}[0]\n"}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4953 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4954 push (@starthash, {0 => "0"});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4955 push (@endhash, {0 => "0"});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4956 $currentcontigchrs[$i] = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4957 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4958 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4959 $stopper = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4960 # print "exists: @temparr\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4961 push(@micro_count, scalar(@temparr));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4962 push(@species_list, [@temparr]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4963 my @tempstart = (); my @tempend = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4964 my %localends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4965 my %localhash = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4966 # print "---------------------------\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4967
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4968 foreach my $templine (@temparr){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4969 # print "templine = $templine\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4970 my @tields = split(/\t/,$templine);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4971 my $start = $tields[$startcord]; # - $tields[$gapcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4972 my $end = $tields[$endcord]; #- $tields[$gapcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4973 my $realstart = $tields[$startcord]- $tields[$gapcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4974 my $gapsinmicrosat = ($tields[$microsatcord] =~ s/-/-/g);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4975 $gapsinmicrosat = 0 if $gapsinmicrosat !~ /[0-9]+/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4976 # print "infocord = $infocord typecord = $typecord motifcord = $motifcord gapcord = $gapcord startcord = $startcord strandcord = $strandcord endcord = $endcord microsatcord = $microsatcord sequencepos = $sequencepos\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4977 my $realend = $tields[$endcord]- $tields[$gapcord]- $gapsinmicrosat;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4978 # print "real start = $realstart, realend = $realend \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4979 for my $pos ($realstart ... $realend){ $strings[$i][$pos] = $strings[$i][$pos].",".$i.":".$start."-".$end;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4980 push(@tempstart, $start);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4981 push(@tempend, $end);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4982 $localhash{$start."-".$end} = $templine;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4983 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4984 push @starthash, {%localhash};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4985 my $foundclusters =findClusters(join("!",@{$strings[$i]}), $CLUSTER_DIST);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4986
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4987 # print "foundclusters = $foundclusters\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4988
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4989 my @clusters = split(/_/,$foundclusters);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4990
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4991 my $clustno = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4992
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4993 foreach my $cluster (@clusters) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4994 my @constituenst = split(/,/,$cluster);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4995 # print "clusters returned: @constituenst\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4996 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4997
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4998 @string = split("_S0S_",stringPainter(join("_C0C_",@string),$foundclusters));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
4999
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5000
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5001 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5002 next if $stopper == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5003
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5004 # print colored ['blue'],"FINAL:\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5005 my $finalclusters =findClusters(join("!",@string), 1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5006 # print "finalclusters = $finalclusters\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5007 # print colored ['blue'],"----------------------\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5008 my @clusters = split(/,/,$finalclusters);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5009 # print "@string\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5010 # print "@clusters\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5011 # print "------------------------------------------------------------------\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5012
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5013 my $clustno = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5014
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5015 # foreach my $cluster (@clusters) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5016 # my @constituenst = split(/,/,$cluster);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5017 # print "clusters returned: @constituenst\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5018 # }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5019
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5020 next if (scalar @clusters == 0);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5021
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5022 my @contigcluster=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5023 my $clusterno=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5024 my @contigClusterstarts=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5025 my @contigClusterends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5026
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5027 foreach my $clust (@clusters){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5028 # print "cluster: $clust\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5029 $clusterno++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5030 my @localclust = split(/\./, $clust);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5031 my @result = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5032 my @starts = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5033 my @ends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5034
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5035 for my $i (0 ... $#localclust){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5036 # print "localclust[$i]: $localclust[$i]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5037 my @pattern = split(/:/, $localclust[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5038 my @cords = split(/-/, $pattern[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5039 push (@starts, $cords[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5040 push (@ends, $cords[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5041 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5042
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5043 my $extremestart = smallest_number(@starts);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5044 my $extremeend = largest_number(@ends);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5045 push(@contigClusterstarts, $extremestart);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5046 push(@contigClusterends, $extremeend);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5047 # print "cluster starts from $extremestart and ends at $extremeend \n" if $prinkter == 1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5048
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5049 foreach my $clustparts (@localclust){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5050 my @pattern = split(/:/, $clustparts);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5051 # print "printing from pattern: $pattern[1]: $starthash[$pattern[0]]{$pattern[1]}\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5052 push (@result, $starthash[$pattern[0]]{$pattern[1]});
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5053 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5054 push(@contigcluster, join("\t", @result));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5055 # print join("\t", @result),"<-result \n" if $prinkter == 1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5056 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5057
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5058
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5059 my $firstclusterstart = smallest_number(@contigClusterstarts);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5060 my $lastclusterend = largest_number(@contigClusterends);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5061
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5062
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5063 $contigclustersFirstStartOnly{$key2}=$firstclusterstart;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5064 $contigclustersLastEndOnly{$key2} = $lastclusterend;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5065 $contigclusters{$key2}=[ @contigcluster ];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5066 # print "currentcontigchr are @currentcontigchrs , firstclusterstart = $firstclusterstart, lastclusterend = $lastclusterend\n " if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5067 for my $i (0 ... $#tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5068 #1 check if there exists adjacent alignment block wrt coordinates of this species.
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5069 next if $currentcontigchrs[$i] eq "0"; #1 this means that there are no microsats in this species in this alignment block..
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5070 #2 no need to worry about proximity of anything in adjacent block!
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5071
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5072 #1 BELOW, the following is really to calclate the distance between the end coordinate of the
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5073 #2 cluster and the end of the gap-free sequence of each species. this is so that if an
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5074 #3 adjacent alignment block is found lateron, the exact distance between the potentially
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5075 #4 adjacent microsat clusters can be found here. the exact start coordinate will be used
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5076 #5 immediately below.
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5077 # print "full sequence = $origsequences[$i] and its length = ",length($origsequences[$i])," \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5078
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5079 my $species_startsubstring = substr($origsequences[$i], 0, $firstclusterstart);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5080 my $species_endsubstring = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5081
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5082 if (length ($origsequences[$i]) <= $lastclusterend+1){ $species_endsubstring = "";}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5083 else{ $species_endsubstring = substr($origsequences[$i], $lastclusterend+1);}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5084
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5085 # print "\nnot defined species_endsubstring...\n" if !defined $species_endsubstring && $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5086 # print "for species: $tags[$i]: \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5087
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5088 $species_startsubstring =~ s/-| //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5089 $species_endsubstring =~ s/-| //g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5090 $contigclustersLastEndLengthOnly{$key2}[$i]=length($species_endsubstring);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5091 $contigclustersFirstStartLengthOnly{$key2}[$i]=length($species_startsubstring);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5092
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5093
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5094
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5095 # print "species_startsubstring = $species_startsubstring, and its length =",length($species_startsubstring)," \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5096 # print "species_endsubstring = $species_endsubstring, and its length =",length($species_endsubstring)," \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5097 # print "attaching to contigclustersLastEndOnly: $key2: $i\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5098
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5099 # print "just confirming: $contigclustersLastEndLengthOnly{$key2}[$i] \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5100
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5101 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5102
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5103
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5104 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5105 # print "\ndone the job of filling... \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5106 #///////////////////////////////////////////////////////////////////////////////////////
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5107 #///////////////////////////////////////////////////////////////////////////////////////
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5108 #///////////////////////////////////////////////////////////////////////////////////////
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5109 #///////////////////////////////////////////////////////////////////////////////////////
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5110 $prinkter=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5111 open (BO, "<$aligns") or die "Cannot open alignment file: $aligns: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5112
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5113 my %clusteringpaths=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5114 my %clustersholder=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5115 my %foundkeys=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5116 my %clusteringpathsRev=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5117
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5118
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5119 my $totalcount=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5120 my $founkeys_enteredcount=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5121 my $transfered=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5122 my $complete_transfered=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5123 my $plain_transfered=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5124 my $existing_removed=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5125
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5126 while (my $line = <BO>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5127 # print "x" x 60, "\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5128 next if $line !~ /^[0-9]+/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5129 #print $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5130 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5131 my @fields2 = split(/\t/,$line);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5132 my $key2 = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5133 if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)/ ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5134 $key2 = join("\t",$1, $2, $4, $5);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5135 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5136
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5137 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5138 # print "seq line $line incompatible\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5139 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5140 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5141 # print "KEY = : $key2\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5142
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5143
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5144 my @currentcontigstarts=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5145 my @currentcontigends=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5146 my @currentcontigchrs=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5147 my @clusters = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5148 my @clusterscopy=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5149 if (exists $contigclusters{$key2}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5150 @clusters = @{$contigclusters{$key2}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5151 @clusterscopy=@clusters;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5152 for my $i (0 ... $#tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5153 # print "in line $line, trying to hunt for: $tags[$i]\\s([a-zA-Z0-9])+\\s([0-9]+)\\s([0-9]+) \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5154 if ($line =~ /$tags[$i]\s([a-zA-Z0-9_]+)\s([0-9]+)\s([0-9]+)/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5155 # print "org = $tags[$i], and chr = $1, start = $2, end =$3 \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5156 my $startkey = $1."_S0E_".$2; #print "adding start key for this alignmebt block: $startkey to species $tags[$i]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5157 my $endkey = $1."_S0E_".$3; #print "adding end key for this alignmebt block: $endkey to species $tags[$i]\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5158 $currentcontigchrs[$i]=$1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5159 $currentcontigstarts[$i]=$2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5160 $currentcontigends[$i]=$3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5161 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5162 else {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5163 $currentcontigchrs[$i] = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5164 # print "no microsat clusters for $key2\n" if $prinkter == 1; next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5165 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5166 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5167 } # print "exists: @{$hasharr[$i]{$key2}}[0]\n"}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5168
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5169 my @sequences = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5170 for (0 ... $#tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5171 my $seq = <BO>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5172 # print $seq;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5173 chomp $seq;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5174 push(@sequences , " ".$seq);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5175 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5176
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5177 next if scalar @currentcontigchrs == 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5178
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5179 # print "contigchrs= @currentcontigchrs \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5180 my %visitedcontigs=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5181
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5182 for my $i (0 ... $#tags){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5183 #1 check if there exists adjacent alignment block wrt coordinates of this species.
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5184 next if $currentcontigchrs[$i] eq "0"; #1 this means that there are no microsats in this species in this alignment block..
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5185 #2 no need to worry about proximity of anything in adjacent block!
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5186 @clusters=@clusterscopy;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5187 #1 BELOW, the following is really to calclate the distance between the end coordinate of the
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5188 #2 cluster and the end of the gap-free sequence of each species. this is so that if an
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5189 #3 adjacent alignment block is found lateron, the exact distance between the potentially
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5190 #4 adjacent microsat clusters can be found here. the exact start coordinate will be used
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5191 #5 immediately below.
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5192 my $firstclusterstart = $contigclustersFirstStartOnly{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5193 my $lastclusterend = $contigclustersLastEndOnly{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5194
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5195 my $key3 = $currentcontigchrs[$i]."_S0E_".($currentcontigstarts[$i]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5196 # print "check if exists $key3 in contigends for $i\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5197
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5198 if (exists($contigends[$i]{$key3}) && !exists $visitedcontigs{$contigends[$i]{$key3}}){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5199 $visitedcontigs{$contigends[$i]{$key3}} = $contigends[$i]{$key3}; #1 this array keeps track of adjacent contigs that we have already visited, thus saving computational time and potential redundancies#
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5200 # print "just checking the hash visitedcontigs: ",$visitedcontigs{$contigends[$i]{$key3}} ,"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5201
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5202 #1 extract coordinates of the last cluster of this found alignment block
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5203 # print "key of the found alignment block = ", $contigends[$i]{$key3},"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5204 # print "we are trying to mine: contigclustersAllLastEndLengthOnly_raw: $contigends[$i]{$key3}: $i \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5205 # print "EXISTS\n" if exists $contigclusters{$contigends[$i]{$key3}} && $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5206 # print "does NOT EXIST\n" if !exists $contigclusters{$contigends[$i]{$key3}} && $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5207 my @contigclustersAllFirstStartLengthOnly_raw=@{$contigclustersFirstStartLengthOnly{$key2}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5208 my @contigclustersAllLastEndLengthOnly_raw=@{$contigclustersLastEndLengthOnly{$contigends[$i]{$key3}}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5209 my @contigclustersAllFirstStartLengthOnly=(); my @contigclustersAllLastEndLengthOnly=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5210
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5211 for my $val (0 ... $#contigclustersAllFirstStartLengthOnly_raw){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5212 # print "val = $val\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5213 if (defined $contigclustersAllFirstStartLengthOnly_raw[$val]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5214 push(@contigclustersAllFirstStartLengthOnly, $contigclustersAllFirstStartLengthOnly_raw[$val]) if $contigclustersAllFirstStartLengthOnly_raw[$val] =~ /[0-9]+/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5215 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5216 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5217 # print "-----\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5218 for my $val (0 ... $#contigclustersAllLastEndLengthOnly_raw){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5219 # print "val = $val\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5220 if (defined $contigclustersAllLastEndLengthOnly_raw[$val]){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5221 push(@contigclustersAllLastEndLengthOnly, $contigclustersAllLastEndLengthOnly_raw[$val]) if $contigclustersAllLastEndLengthOnly_raw[$val] =~ /[0-9]+/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5222 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5223 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5224
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5225
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5226 # print "our two arrays are: starts = <@contigclustersAllFirstStartLengthOnly> ......... and ends = <@contigclustersAllLastEndLengthOnly>\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5227 # print "the last cluster's end in that one is: ",smallest_number(@contigclustersAllFirstStartLengthOnly) + smallest_number(@contigclustersAllLastEndLengthOnly)," = ", smallest_number(@contigclustersAllFirstStartLengthOnly)," + ",smallest_number(@contigclustersAllLastEndLengthOnly),"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5228
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5229 # if ($contigclustersFirstStartLengthOnly{$key2}[$i] + $contigclustersLastEndLengthOnly{$contigends[$i]{$key3}}[$i] < 50){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5230 if (smallest_number(@contigclustersAllFirstStartLengthOnly) + smallest_number(@contigclustersAllLastEndLengthOnly) < $CLUSTER_DIST){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5231 my @regurgitate = @{$contigclusters{$contigends[$i]{$key3}}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5232 $regurgitate[$#regurgitate]=~s/\n//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5233 $regurgitate[$#regurgitate] = $regurgitate[$#regurgitate]."\t".shift(@clusters);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5234 delete $contigclusters{$contigends[$i]{$key3}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5235 $contigclusters{$contigends[$i]{$key3}}=[ @regurgitate ];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5236 delete $contigclusters{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5237 $contigclusters{$key2}= [ @clusters ] if scalar(@clusters) >0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5238 $contigclusters{$key2}= [ "" ] if scalar(@clusters) ==0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5239
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5240 if (scalar(@clusters) < 1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5241 # print "$key2-> $clusteringpaths{$key2} in the loners\n" if exists $foundkeys{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5242 $clusteringpaths{$key2}=$contigends[$i]{$key3};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5243 $clusteringpathsRev{$contigends[$i]{$key3}}=$key2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5244 print OUTP "$contigends[$i]{$key3} -> $clusteringpathsRev{$contigends[$i]{$key3}}\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5245 # print " clusteringpaths $key2 -> $contigends[$i]{$key3}\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5246 $founkeys_enteredcount-- if exists $foundkeys{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5247 $existing_removed++ if exists $foundkeys{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5248 # print "$key2->",@{$contigclusters{$key2}},"->>$foundkeys{$key2}\n" if exists $foundkeys{$key2} && $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5249 delete $foundkeys{$key2} if exists $foundkeys{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5250 $complete_transfered++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5251 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5252 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5253 print OUTP "$key2-> 0 not so lonely\n" if !exists $clusteringpathsRev{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5254 $clusteringpaths{$key2}=$key2 if !exists $clusteringpaths{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5255 $clusteringpathsRev{$key2}=0 if !exists $clusteringpathsRev{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5256
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5257 $founkeys_enteredcount++ if !exists $foundkeys{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5258 $foundkeys{$key2} = $key2 if !exists $foundkeys{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5259 # print "adding foundkeys entry $foundkeys{$key2}\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5260 $transfered++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5261 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5262 #$contigclusters{$key2}=[ @contigcluster ];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5263 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5264 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5265 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5266 # print "adjacent block with species $tags[$i] does not exist\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5267 $plain_transfered++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5268 print OUTP "$key2-> 0 , going straight\n" if exists $contigclusters{$key2} && !exists $clusteringpathsRev{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5269 $clusteringpaths{$key2}=$key2 if exists $contigclusters{$key2} && !exists $clusteringpaths{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5270 $clusteringpathsRev{$key2}=0 if exists $contigclusters{$key2} && !exists $clusteringpathsRev{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5271 $founkeys_enteredcount++ if !exists $foundkeys{$key2} && exists $contigclusters{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5272 $foundkeys{$key2} = $key2 if !exists $foundkeys{$key2} && exists $contigclusters{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5273 # print "adding foundkeys entry $foundkeys{$key2}\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5274
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5275 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5276 $totalcount++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5277
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5278 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5279
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5280
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5281 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5282 close BO;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5283 #close (NORTH);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5284 #///////////////////////////////////////////////////////////////////////////////////////
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5285 #///////////////////////////////////////////////////////////////////////////////////////
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5286 #///////////////////////////////////////////////////////////////////////////////////////
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5287 #///////////////////////////////////////////////////////////////////////////////////////
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5288
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5289 my $founkeys_count=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5290 my $nopath_count=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5291 my $pathed_count=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5292 foreach my $key2 (keys %foundkeys){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5293 #print "x" x 60, "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5294 # print "x" if $dotcounter % 100 ==0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5295 # print "\n" if $dotcounter % 5000 ==0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5296 $founkeys_count++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5297 my $key = $key2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5298 # print "$key2 -> $clusteringpaths{$key2}\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5299 if ($clusteringpaths{$key} eq $key){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5300 # print "printing hit the alignment block immediately... no path needed\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5301 $nopath_count++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5302 delete $foundkeys{$key2};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5303 print ORTH join ("\n",@{$contigclusters{$key2}}),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5304 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5305 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5306 my @pool=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5307 my $key3=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5308 $pathed_count++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5309 # print "going reverse... clusteringpathsRev, $key = $clusteringpathsRev{$key}\n" if exists $clusteringpathsRev{$key} && $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5310 # print "going reverse... clusteringpathsRev $key does not exist\n" if !exists $clusteringpathsRev{$key} && $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5311 if ($clusteringpathsRev{$key} eq "0") {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5312 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5313 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5314 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5315 my $yek3 = $clusteringpathsRev{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5316 my $yek = $key;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5317 # print "caught in the middle of a path, now goin down from $yek to $yek3, which is $clusteringpathsRev{$key} \n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5318 while ($yek3 ne "0"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5319 # print "$yek->$yek3," if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5320 $yek = $yek3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5321 $yek3 = $clusteringpathsRev{$yek};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5322 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5323 # print "\nfinally reached the end of path: $yek3, and the next in line is $yek, and its up-route is $clusteringpaths{$yek}\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5324 $key3 = $clusteringpaths{$yek};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5325 $key = $yek;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5326 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5327
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5328 # print "now that we are at bottom of the path, lets start climbing up again\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5329
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5330 while($key ne $key3){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5331 # print "KEEY $key->$key3\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5332 # print "our contigcluster = @{$contigclusters{$key}}\n----------\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5333
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5334 if (scalar(@{$contigclusters{$key}}) > 0) {push @pool, @{$contigclusters{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5335 # print "now pool = @pool\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5336 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5337 delete $foundkeys{$key3};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5338 $key=$key3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5339 $key3=$clusteringpaths{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5340 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5341 # print "\nfinally, adding the first element of path: @{$contigclusters{$key}}\n AND printing the contents:\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5342 my @firstcontig= @{$contigclusters{$key}};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5343 delete $foundkeys{$key2} if exists $foundkeys{$key2} ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5344 delete $foundkeys{$key} if exists $foundkeys{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5345
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5346 unshift @pool, pop @firstcontig;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5347 # print join("\t",@pool),"\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5348 print ORTH join ("\n",@firstcontig),"\n" if scalar(@firstcontig) > 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5349 print ORTH join ("\t",@pool),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5350 # join();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5351 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5352
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5353 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5354 #close (NORTH);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5355 # print "founkeys_entered =$founkeys_enteredcount, plain_transfered=$plain_transfered,existing_removed=$existing_removed,founkeys_count =$founkeys_count, nopath_count =$nopath_count, transfered = $transfered, complete_transfered = $complete_transfered, totalcount = $totalcount, pathed=$pathed_count\n" if $prinkter == 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5356 close (BO);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5357 close (ORTH);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5358 close (OUTP);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5359 return 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5360
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5361 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5362 sub stringPainter{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5363 my @string = split(/_C0C_/,$_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5364 # print $_[0], " <- in stringPainter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5365 # print $_[1], " <- in clusters\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5366
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5367 my @clusters = split(/,/, $_[1]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5368 for my $i (0 ... $#clusters){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5369 my $cluster = $clusters[$i];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5370 # print "cluster = $cluster\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5371 my @parts = split(/\./,$cluster);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5372 my @cord = split(/:|-/,shift(@parts));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5373 my $minstart = $cord[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5374 my $maxend = $cord[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5375 # print "minstart = $minstart , maxend = $maxend\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5376
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5377 for my $j (0 ... $#parts){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5378 # print "oing thri $parts[$j]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5379 my @cord = split(/:|-/,$parts[$j]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5380 $minstart = $cord[1] if $cord[1] < $minstart;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5381 $maxend = $cord[2] if $cord[2] > $maxend;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5382 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5383 # print "minstart = $minstart , maxend = $maxend\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5384 for my $pos ($minstart ... $maxend){ $string[$pos] = $string[$pos].",".$cluster;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5385
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5386
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5387 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5388 # print "@string <-done from function stringPainter\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5389 return join("_S0S_",@string);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5390 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5391
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5392 sub findClusters{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5393 my $continue = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5394 my @mapped_clusters = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5395 my $clusterdist = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5396 my $previous = 'x';
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5397 my @localcluster = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5398 my $cluster_starts = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5399 my $cluster_ends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5400 my $localcluster_start = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5401 my $localcluster_end = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5402 my @record_cluster = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5403 my @string = split(/\!/, $_[0]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5404 my $zerolength=0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5405
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5406 for my $pos_pos (1 ... $#string){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5407 my $pos = $string[$pos_pos];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5408 # print $pos, "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5409 if ($continue == 0 && $pos eq "x") {next;}
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5410
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5411 if ($continue == 1 && $pos eq "x" && $zerolength <= $clusterdist){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5412 if ($zerolength == 0) {$localcluster_end = $pos_pos-1};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5413 $zerolength++;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5414 $continue = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5415 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5416
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5417 if ($continue == 1 && $pos eq "x" && $zerolength > $clusterdist) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5418 $zerolength = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5419 $continue = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5420 my %seen;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5421 my @uniqed = grep !$seen{$_}++, @localcluster;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5422 # print "caught cluster : @uniqed \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5423 push(@mapped_clusters, [@uniqed]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5424 # print "clustered:\n@uniqed\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5425 @localcluster = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5426 @record_cluster = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5427
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5428 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5429
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5430 if ($pos ne "x"){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5431 $zerolength = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5432 $continue = 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5433 $pos =~ s/x,//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5434 my @entries = split(/,/,$pos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5435 $localcluster_end = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5436 $localcluster_start = 0;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5437 push(@record_cluster,$pos);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5438
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5439 if ($continue == 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5440 @localcluster = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5441 @localcluster = (@localcluster, @entries);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5442 $localcluster_start = $pos_pos;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5443 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5444
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5445 if ($continue == 1 ) {
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5446 @localcluster = (@localcluster, @entries);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5447 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5448 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5449 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5450
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5451 if (scalar(@localcluster) > 0){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5452 my %seen;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5453 my @uniqed = grep !$seen{$_}++, @localcluster;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5454 # print "caught cluster : @uniqed \n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5455 push(@mapped_clusters, [@uniqed]);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5456 # print "clustered:\n@uniqed\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5457 @localcluster = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5458 @record_cluster = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5459 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5460
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5461 my @returner = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5462
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5463 foreach my $clust (@mapped_clusters){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5464 my @localclust = @$clust;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5465 my @result = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5466 foreach my $clustparts (@localclust){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5467 push(@result,$clustparts);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5468 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5469 push(@returner , join(".",@result));
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5470 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5471 # print "returnig: ", join(",",@returner), "\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5472 return join(",",@returner);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5473 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5474 #xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5475
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5476 #xxxxxxxxxxxxxx MakeTrees xxxxxxxxxxxxxxxxxxxxxxxxxxxx MakeTrees xxxxxxxxxxxxxxxxxxxxxxxxxxxx MakeTrees xxxxxxxxxxxxxxxxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5477
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5478 sub MakeTrees{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5479 my $tree = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5480 my @parts=($tree);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5481 # my @parts=();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5482
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5483 while (1){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5484 $tree =~ s/^\(//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5485 $tree =~ s/\)$//g;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5486 my @arr = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5487
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5488 if ($tree =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_\(\),]+)\)$/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5489 @arr = $tree =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_\(\),]+)$/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5490 $tree = $2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5491 push @parts, $tree;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5492 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5493 elsif ($tree =~ /^\(([a-zA-Z0-9_\(\),]+),([a-zA-Z0-9_]+)$/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5494 @arr = $tree =~ /^([a-zA-Z0-9_\(\),]+),([a-zA-Z0-9_]+)$/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5495 $tree = $1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5496 push @parts, $tree;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5497 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5498 elsif ($tree =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_]+)$/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5499 last;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5500 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5501 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5502 return @parts;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5503 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5504
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5505 #xxxxxxxxxxxxxx qualityFilter xxxxxxxxxxxxxxxxxxxxxxxxxxxx qualityFilter xxxxxxxxxxxxxxxxxxxxxxxxxxxx qualityFilter xxxxxxxxxxxxxxxxxxxxxxxxxxxx
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5506
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5507 sub qualityFilter{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5508 my $unmaskedorthfile = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5509 my $seqfile = $_[1];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5510 my $maskedorthfile = $_[2];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5511 my $filteredout = $maskedorthfile."_residue";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5512 open (PMORTH, "<$unmaskedorthfile") or die "Cannot open unmaskedorthfile file: $unmaskedorthfile: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5513
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5514 my %keyhash = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5515
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5516 while (my $line = <PMORTH>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5517 my $key = join("\t", $1,$2,$3,$4) if $line =~ /($focalspec)\s+([a-zA-Z0-9\-_]+)\s+([0-9]+)\s+([0-9]+)/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5518 push @{$keyhash{$key}}, $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5519 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5520
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5521 open (SEQ, "<$seqfile") or die "Cannot open seqfile file: $seqfile: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5522 open (MORTH, ">$maskedorthfile") or die "Cannot open maskedorthfile file: $maskedorthfile: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5523 open (RES, ">$filteredout") or die "Cannot open filteredout file: $filteredout: $!";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5524
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5525
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5526
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5527 while (my $line = <SEQ>){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5528 chomp $line;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5529 if ($line =~ /($focalspec)\s+([a-zA-Z0-9\-_]+)\s+([0-9]+)\s+([0-9]+)/){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5530 my $key = join("\t", $1,$2,$3,$4);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5531 next if !exists $keyhash{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5532 my @orths = @{$keyhash{$key}} if exists $keyhash{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5533 delete $keyhash{$key};
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5534
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5535 my $sine = <SEQ>;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5536
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5537 foreach my $orth (@orths){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5538 #print "-----------------------------------------------------------------\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5539 #print $orth;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5540 my $orthcopy = $orth;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5541 $orth =~ s/^>//;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5542 my @parts = split(/>/,$orth);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5543
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5544 my @starts = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5545 my @ends = ();
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5546
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5547 foreach my $part (@parts){
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5548 my $no_of_species = adjustCoordinates($part);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5549 my @pields = split(/\t/,$part);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5550
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5551 # print "pields = @pields .. no_of_species = $no_of_species .. startcord = $pields[$startcord]\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5552
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5553 push @starts, $pields[$startcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5554 push @ends, $pields[$endcord];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5555 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5556
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5557 #print "starts = @starts ... ends = @ends\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5558
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5559 my $leftend = smallest_number(@starts)-10;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5560 my $rightend = largest_number(@ends)+10;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5561
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5562 my $maskarea = substr($sine, $leftend, $rightend-$leftend+1);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5563 print RES $orth if $maskarea =~ /#/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5564
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5565
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5566 next if $maskarea =~ /#/;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5567
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5568 print MORTH $orthcopy;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5569 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5570 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5571 else{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5572 next;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5573 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5574
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5575
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5576 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5577
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5578 # print "UNDONE: ", scalar(keys %keyhash),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5579 # print MORTH "UNDONE: ", scalar(keys %keyhash),"\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5580
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5581 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5582
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5583 sub adjustCoordinates{
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5584 my $line = $_[0];
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5585 my $no_of_species = $line =~ s/(chr[0-9a-zA-Z]+)|(Contig[0-9a-zA-Z\._\-]+)|(scaffold[0-9a-zA-Z\._\-]+)|(supercontig[0-9a-zA-Z\._\-]+)/x/ig;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5586 my @got = ($line =~ s/(chr[0-9a-zA-Z]+)|(Contig[0-9a-zA-Z\._\-]+)/x/g);
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5587 # print "line = $line\n";
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5588 $infocord = 2 + (4*$no_of_species) - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5589 $typecord = 2 + (4*$no_of_species) + 1 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5590 $motifcord = 2 + (4*$no_of_species) + 2 - 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5591 $gapcord = $motifcord+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5592 $startcord = $gapcord+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5593 $strandcord = $startcord+1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5594 $endcord = $strandcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5595 $microsatcord = $endcord + 1;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5596 $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5597 $interr_poscord = $microsatcord + 3;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5598 $no_of_interruptionscord = $microsatcord + 4;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5599 $interrcord = $microsatcord + 2;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5600 # print "$line\n startcord = $startcord, and endcord = $endcord and no_of_species = $no_of_species\n" if $line !~ /calJac/i;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5601 return $no_of_species;
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5602 }
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5603
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5604
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5605
275433d3a395 Uploaded tool tarball.
devteam
parents:
diff changeset
5606