Mercurial > repos > cpt > cpt_psm_recombine
diff lib/CPT/Bio/NW_MSA.pm @ 1:97ef96676b48 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:51:26 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/CPT/Bio/NW_MSA.pm Mon Jun 05 02:51:26 2023 +0000 @@ -0,0 +1,228 @@ +package CPT::Bio::NW_MSA; +use Moose; +use strict; +use warnings; +use autodie; +use List::Util qw(max); + +has 'sequences' => ( is => 'rw', isa => 'ArrayRef'); +# If true, relationships are assumed to be bidirectional +has 'bidi' => (is => 'rw', isa => 'Bool'); +has 'relationships' => ( is => 'rw', isa => 'HashRef', default => sub { {} }); +# +has 'verbose' => ( is => 'rw', isa => 'Num'); +# +has 'gap_penalty' => ( is => 'rw', isa => 'Num'); +has 'match_score' => ( is => 'rw', isa => 'Num'); +has 'mismatch_score' => ( is => 'rw', isa => 'Num'); + +# local stuff +#has 'current_list' => ( is => 'rw', isa => 'ArrayRef'); +has 'merger' => (is => 'rw', isa => 'HashRef'); +has 'number_of_aligned_lists' => ( is => 'rw', isa => 'Num', default => sub { 0 }); + +sub add_relationship { + my ($self, $from, $to) = @_; + ${$self->relationships()}{$from}{$to} = 1; + if($self->bidi()){ + ${$self->relationships()}{$to}{$from} = 1; + } +} + +sub Sij { + my($self, $merger_row, $query) = @_; + # Comparing a query against a merger row + my @check_against = @{$merger_row}; + #print "Checking " . join(",",@check_against) .":$query\t"; + foreach(@check_against){ + if(${$self->relationships()}{$query}{$_} + || ${$self->relationships()}{$_}{$query}){ + #print $self->match_score() . "\n"; + return $self->match_score(); + } + } + #print $self->mismatch_score() . "\n"; + return $self->mismatch_score(); +} + +sub align_list { + my ($self, $list_ref) = @_; + # If we haven't aligned any lists, we do something special + if($self->number_of_aligned_lists() == 0){ + # Pretend we've aligned ONE list already + $self->number_of_aligned_lists(1); + my @list = @{$list_ref}; + # Fake the merger + my %merger; + for(my $i = 0; $i < scalar @list; $i++){ + $merger{$i} = [$list[$i]]; + } + $self->merger(\%merger); + }else{ + $self->find_best_path($self->merger(), $list_ref); + $self->number_of_aligned_lists($self->number_of_aligned_lists() + 1); + } +} + +sub find_best_path { + my ($self, $merger_ref, $list_ref) = @_; + my %merger = %{$merger_ref}; + my @list = @{$list_ref}; + + my $max_i = scalar(keys(%merger)); + my $max_j = scalar(@list); + + my %score_mat; + my %point_mat; + + # Initial zeros for matrices + $point_mat{0}{0} = 'DONE'; + $score_mat{0}{0} = 0; + + for(my $a = 1; $a <= $max_i; $a++){ + $point_mat{$a}{0} = 'U'; + $score_mat{$a}{0} = $self->gap_penalty(); + } + for(my $b = 1; $b <= $max_j; $b++){ + $point_mat{0}{$b} = 'L'; + $score_mat{0}{$b} = $self->gap_penalty(); + } + + # Score + for(my $i = 1 ; $i <= $max_i; $i++){ + my $ci = $merger{$i-1}; + for(my $j = 1; $j <= $max_j; $j++){ + my $cj = $list[$j-1]; + # Scoring + my $diag_score = $score_mat{$i-1}{$j-1} + $self->Sij($ci,$cj); + my $up_score = $score_mat{$i-1}{$j} + $self->gap_penalty(); + my $left_score = $score_mat{$i}{$j-1} + $self->gap_penalty(); + + if($diag_score >= $up_score){ + if($diag_score >= $left_score){ + $score_mat{$i}{$j} = $diag_score; + $point_mat{$i}{$j} = 'D'; + }else{ + $score_mat{$i}{$j} = $left_score; + $point_mat{$i}{$j} = 'L'; + } + }else{ + if($up_score >= $left_score){ + $score_mat{$i}{$j} = $up_score; + $point_mat{$i}{$j} = 'U'; + }else{ + $score_mat{$i}{$j} = $left_score; + $point_mat{$i}{$j} = 'L'; + } + } + } + } + + $self->print2DArray('score_mat', \%score_mat); + $self->print2DArray('point_mat', \%point_mat); + + + # Calculate merger + my @new_row_set; + my $i = $max_i + 0; + my $j = $max_j + 0; + while($i != 0 || $j != 0){ + my $dir = $point_mat{$i}{$j}; + my @new_row; + if($dir eq 'D'){ + push(@new_row, @{$merger{$i-1}}, $list[$j-1]); + $i--; + $j--; + }elsif($dir eq 'L'){ + push(@new_row, split(//, '-' x ($self->number_of_aligned_lists()))); + push(@new_row, $list[$j-1]); + $j--; + }elsif($dir eq 'U'){ + push(@new_row, @{$merger{$i-1}}, '-'); + $i--; + } + if($self->verbose()){ + print join("\t", $i, $j, $dir, @new_row),"\n"; + } + push(@new_row_set,\@new_row); + } + + my %new_merger; + for(my $i = 0; $i < scalar(@new_row_set); $i++){ + $new_merger{$i} = $new_row_set[scalar(@new_row_set) - $i - 1]; + } + $self->merger(\%new_merger); +} + +sub merged_array{ + my ($self) = @_; + my %m = %{$self->merger()}; + my @result; + foreach(sort{$a<=>$b} keys(%m)){ + push(@result, $m{$_}); + } + return @result; +} + + +sub print2DArray{ + my($self,$name, $ref) = @_; + if($self->verbose && defined $ref){ + print '*' x 32,"\n"; + print $name,"\n"; + if(ref $ref eq 'ARRAY'){ + foreach(@{$ref}){ + print join("\t",@{$_}),"\n"; + } + }elsif(ref $ref eq 'HASH'){ + my %h = %{$ref}; + foreach my $a(sort(keys(%h))){ + if(ref $h{$a} eq 'ARRAY'){ + print join("", map { sprintf "%-4s",$_ } @{$h{$a}}); + }else{ + foreach my $b(sort(keys($h{$a}))){ + if(defined($h{$a}{$b})){ + printf "%5s", $h{$a}{$b}; + } + } + } + print "\n"; + } + }else{ + die 'Unsupported'; + } + print '*' x 32,"\n"; + } +} + + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::NW_MSA + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche <rasche.eric@yandex.ru> + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut