Mercurial > repos > jjohnson > crest
comparison SCValidator.pm @ 0:acc8d8bfeb9a
Uploaded
author | jjohnson |
---|---|
date | Wed, 08 Feb 2012 16:59:24 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acc8d8bfeb9a |
---|---|
1 package SCValidator; | |
2 # this is a singleton class | |
3 | |
4 use strict; | |
5 use Exporter 'import'; | |
6 our @EXPORT_OK = qw($lowqual_cutoff $min_percent_id $min_percent_hq LEFT_CLIP RIGHT_CLIP); | |
7 use Bio::DB::Sam; | |
8 use constant LEFT_CLIP => 0; | |
9 use constant RIGHT_CLIP => 1; | |
10 | |
11 our $min_percent_hq = 80; | |
12 our $lowqual_cutoff = 20; | |
13 our $min_percent_id = 90; | |
14 | |
15 my $SCValidator; | |
16 my %default_validators = ( | |
17 "quality_validator" => \&_quality_validator, | |
18 "align_validator" => \&_align_validator, | |
19 "strand_validator" => \&_strand_validator, | |
20 ); | |
21 | |
22 sub new { | |
23 if( $SCValidator) { | |
24 return $SCValidator; | |
25 } else { | |
26 my $type = shift; | |
27 my $this = { | |
28 "validator" => {} | |
29 }; | |
30 foreach my $v ( keys(%default_validators)) { | |
31 $this->{validator}{$v} = $default_validators{$v}; | |
32 } | |
33 $SCValidator = bless $this, $type; | |
34 } | |
35 } | |
36 | |
37 sub add_validator { | |
38 my $self = shift; | |
39 my $name = shift; | |
40 $name = lc $name; | |
41 if( exists($self->{validator}{$name})) { | |
42 # print STDERR "Validator: $name is already in the list"; | |
43 return; | |
44 } | |
45 if( exists($default_validators{$name})) { | |
46 $self->{validator}{$name} = $default_validators{$name}; | |
47 return; | |
48 } | |
49 my $validator = shift; | |
50 die "Validator must be coderef" if(ref($validator) ne "CODE"); | |
51 $self->{validator}{$name} = $validator; | |
52 } | |
53 | |
54 sub remove_validator { | |
55 my ($self, $name) = @_; | |
56 $name = lc $name; | |
57 if(exists($self->{validator}{$name})) { | |
58 delete $self->{validator}{$name}; | |
59 } | |
60 } | |
61 | |
62 sub validate { | |
63 my $self = shift; | |
64 my $a = shift; | |
65 my $clip = shift; | |
66 die "The method will only validate bam alignment object" | |
67 if( ref($a) ne "Bio::DB::Bam::Aligment" && | |
68 ref($a) ne "Bio::DB::Bam::AlignWrapper" ); | |
69 return undef if($a->unmapped); | |
70 foreach my $v (keys(%{$self->{"validator"}})) { | |
71 return undef if(!$self->{"validator"}{$v}->($a, $clip)); | |
72 } | |
73 return 1; | |
74 } | |
75 | |
76 sub _align_validator { | |
77 #we only consider high quality part of the alignment | |
78 my $a = shift; | |
79 my ($ref, $matches, $query) = $a->padded_alignment; | |
80 my @qscores = $a->qscore; | |
81 my $n_matches = 0; | |
82 my $align_len = 0; | |
83 my $j = 0; | |
84 my ($start, $end) = (0, length($ref)-1); | |
85 my @cigar_array = @{ $a->cigar_array }; | |
86 $start = $cigar_array[0]->[1] if($cigar_array[0]->[0] eq 'S'); | |
87 $end = $end - $cigar_array[$#cigar_array]->[1] if($cigar_array[$#cigar_array]->[0] eq 'S'); | |
88 | |
89 for( my $i = $start; $i <= $end; $i++) { | |
90 my $m = substr($matches, $i, 1); | |
91 if($m ne '|') { | |
92 if( substr($query, $i, 1) ne '-') { | |
93 # $align_len++ if($qscores[$j] >= $lowqual_cutoff); | |
94 $j++; | |
95 } | |
96 # else { | |
97 # $align_len++ if($qscores[$j] >= $lowqual_cutoff && $qscores[$j+1] >= $lowqual_cutoff); | |
98 # } | |
99 next; | |
100 } | |
101 if($qscores[$j] >= $lowqual_cutoff) { | |
102 $n_matches++ if(substr($ref, $i, 1) eq substr($query, $i, 1)); | |
103 $align_len++; | |
104 } | |
105 $j++; | |
106 } | |
107 return ($align_len - $n_matches > 1) ? undef : 1 if($align_len < 20); | |
108 return ($n_matches * 100 >= $align_len * $min_percent_id) ? 1 : undef; | |
109 } | |
110 | |
111 sub _quality_validator { | |
112 my $a = shift; | |
113 my $clip = shift; | |
114 # my $cigar_str = $a->cigar_str; | |
115 my @cigar_array = @{$a->cigar_array}; | |
116 my @qual = $a->qscore; | |
117 if($clip == LEFT_CLIP) { | |
118 #my($sclip_len) = $cigar_str =~ m/^S(\d+)/; | |
119 my $sclip_len = $cigar_array[0]->[1]; | |
120 @qual = @qual[0 .. ($sclip_len - 1)]; | |
121 } | |
122 elsif ($clip == RIGHT_CLIP) { | |
123 #my($sclip_len) = $cigar_str =~ m/S(\d+)$/; | |
124 my $sclip_len = $cigar_array[$#cigar_array]->[1]; | |
125 @qual = @qual[($a->l_qseq - $sclip_len) .. ($a->l_qseq - 1)]; | |
126 } | |
127 else { | |
128 print STDERR "Please specify the soft_clipping position: | |
129 0: LEFT_CLIP or 1: RIGHT_CLIP"; | |
130 } | |
131 | |
132 my $n_hq = 0; | |
133 for( my $i = 0; $i <= $#qual; $i++) { | |
134 $n_hq++ if($qual[$i] >= $lowqual_cutoff); | |
135 } | |
136 return ($n_hq * 100 > $#qual * $min_percent_hq) ? 1 : undef; | |
137 } | |
138 | |
139 sub _strand_validator { | |
140 my $a = shift; | |
141 my $clip = shift; | |
142 if($clip == LEFT_CLIP ) { | |
143 return ($a->paired && ($a->mtid != $a->tid || $a->mate_start < $a->start)) ? undef : 1; | |
144 } | |
145 elsif($clip == RIGHT_CLIP) { | |
146 return ($a->paired && ($a->mtid != $a->tid || $a->mate_start > $a->end)) ? undef : 1; | |
147 } | |
148 else { | |
149 print STDERR "Please specify the soft_clipping position: | |
150 0: LEFT_CLIP or 1: RIGHT_CLIP"; | |
151 } | |
152 } | |
153 | |
154 1; |