Mercurial > repos > jjohnson > crest
diff SCValidator.pm @ 0:acc8d8bfeb9a
Uploaded
author | jjohnson |
---|---|
date | Wed, 08 Feb 2012 16:59:24 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SCValidator.pm Wed Feb 08 16:59:24 2012 -0500 @@ -0,0 +1,154 @@ +package SCValidator; +# this is a singleton class + +use strict; +use Exporter 'import'; +our @EXPORT_OK = qw($lowqual_cutoff $min_percent_id $min_percent_hq LEFT_CLIP RIGHT_CLIP); +use Bio::DB::Sam; +use constant LEFT_CLIP => 0; +use constant RIGHT_CLIP => 1; + +our $min_percent_hq = 80; +our $lowqual_cutoff = 20; +our $min_percent_id = 90; + +my $SCValidator; +my %default_validators = ( + "quality_validator" => \&_quality_validator, + "align_validator" => \&_align_validator, + "strand_validator" => \&_strand_validator, +); + +sub new { + if( $SCValidator) { + return $SCValidator; + } else { + my $type = shift; + my $this = { + "validator" => {} + }; + foreach my $v ( keys(%default_validators)) { + $this->{validator}{$v} = $default_validators{$v}; + } + $SCValidator = bless $this, $type; + } +} + +sub add_validator { + my $self = shift; + my $name = shift; + $name = lc $name; + if( exists($self->{validator}{$name})) { +# print STDERR "Validator: $name is already in the list"; + return; + } + if( exists($default_validators{$name})) { + $self->{validator}{$name} = $default_validators{$name}; + return; + } + my $validator = shift; + die "Validator must be coderef" if(ref($validator) ne "CODE"); + $self->{validator}{$name} = $validator; +} + +sub remove_validator { + my ($self, $name) = @_; + $name = lc $name; + if(exists($self->{validator}{$name})) { + delete $self->{validator}{$name}; + } +} + +sub validate { + my $self = shift; + my $a = shift; + my $clip = shift; + die "The method will only validate bam alignment object" + if( ref($a) ne "Bio::DB::Bam::Aligment" && + ref($a) ne "Bio::DB::Bam::AlignWrapper" ); + return undef if($a->unmapped); + foreach my $v (keys(%{$self->{"validator"}})) { + return undef if(!$self->{"validator"}{$v}->($a, $clip)); + } + return 1; +} + +sub _align_validator { + #we only consider high quality part of the alignment + my $a = shift; + my ($ref, $matches, $query) = $a->padded_alignment; + my @qscores = $a->qscore; + my $n_matches = 0; + my $align_len = 0; + my $j = 0; + my ($start, $end) = (0, length($ref)-1); + my @cigar_array = @{ $a->cigar_array }; + $start = $cigar_array[0]->[1] if($cigar_array[0]->[0] eq 'S'); + $end = $end - $cigar_array[$#cigar_array]->[1] if($cigar_array[$#cigar_array]->[0] eq 'S'); + + for( my $i = $start; $i <= $end; $i++) { + my $m = substr($matches, $i, 1); + if($m ne '|') { + if( substr($query, $i, 1) ne '-') { + # $align_len++ if($qscores[$j] >= $lowqual_cutoff); + $j++; + } + # else { + # $align_len++ if($qscores[$j] >= $lowqual_cutoff && $qscores[$j+1] >= $lowqual_cutoff); + # } + next; + } + if($qscores[$j] >= $lowqual_cutoff) { + $n_matches++ if(substr($ref, $i, 1) eq substr($query, $i, 1)); + $align_len++; + } + $j++; + } + return ($align_len - $n_matches > 1) ? undef : 1 if($align_len < 20); + return ($n_matches * 100 >= $align_len * $min_percent_id) ? 1 : undef; +} + +sub _quality_validator { + my $a = shift; + my $clip = shift; +# my $cigar_str = $a->cigar_str; + my @cigar_array = @{$a->cigar_array}; + my @qual = $a->qscore; + if($clip == LEFT_CLIP) { + #my($sclip_len) = $cigar_str =~ m/^S(\d+)/; + my $sclip_len = $cigar_array[0]->[1]; + @qual = @qual[0 .. ($sclip_len - 1)]; + } + elsif ($clip == RIGHT_CLIP) { + #my($sclip_len) = $cigar_str =~ m/S(\d+)$/; + my $sclip_len = $cigar_array[$#cigar_array]->[1]; + @qual = @qual[($a->l_qseq - $sclip_len) .. ($a->l_qseq - 1)]; + } + else { + print STDERR "Please specify the soft_clipping position: + 0: LEFT_CLIP or 1: RIGHT_CLIP"; + } + + my $n_hq = 0; + for( my $i = 0; $i <= $#qual; $i++) { + $n_hq++ if($qual[$i] >= $lowqual_cutoff); + } + return ($n_hq * 100 > $#qual * $min_percent_hq) ? 1 : undef; +} + +sub _strand_validator { + my $a = shift; + my $clip = shift; + if($clip == LEFT_CLIP ) { + return ($a->paired && ($a->mtid != $a->tid || $a->mate_start < $a->start)) ? undef : 1; + } + elsif($clip == RIGHT_CLIP) { + return ($a->paired && ($a->mtid != $a->tid || $a->mate_start > $a->end)) ? undef : 1; + } + else { + print STDERR "Please specify the soft_clipping position: + 0: LEFT_CLIP or 1: RIGHT_CLIP"; + } +} + +1;