Mercurial > repos > plus91-technologies-pvt-ltd > softsearch
diff 2.4/library/LevD.pm @ 13:e3609c8714fb draft
Uploaded
author | plus91-technologies-pvt-ltd |
---|---|
date | Fri, 30 May 2014 03:37:55 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2.4/library/LevD.pm Fri May 30 03:37:55 2014 -0400 @@ -0,0 +1,80 @@ +package LevD; + +use lib "/data2/bsi/reference/softsearch/lib/perl5"; +use strict; +use warnings; +use Data::Dumper; +use String::Approx 'adist'; +use String::Approx 'adistr'; +use String::Approx 'aindex'; + +my $WINDOW_SIZE = 100; + +sub new { + my ($class, $file) = @_; + my $self = {}; + + bless($self,$class); + $self->init(); + + return $self; +} + +sub init { + my ($self) = @_; + + #### default values. + $self->{index} = 0; + $self->{relative_edit_dist} = 0; + $self->{edit_dist} = 0; +} + +sub search { + my ($self, $clip, $chr, $start, $stop, $ref) = @_; + + if (! -s $ref) { + die "ERROR: Reference file $ref now found\n"; + } + + #### extact seq from reference file. + my $target = $chr .":". $start ."-". $stop; + my $cmd = "samtools faidx $ref $target"; + + my @output = $self->_run_system_cmd($cmd); + + #### depending on ref file format seq could be on multiple lines + #### concatinate all except for the header in one line. + #### e.g: + #### >chr1:8222999-8223099 + #### GGTGCAATCATAGCTCACTAAGCTTCAACCTCAAGAGATCCTCCCACCTCAGCCTCCCAG + #### GTAGCTGGGACTACAGGCAAATGCCATGACACCTAGCTAAT + my $seq = join("", @output[1..$#output]); + + #### remove new line character + $seq =~ s/\n//g; + + #### find number of mismatches and start index + #### of clip to be searched against target seq. + $self->{relative_edit_dist} = adistr($clip, $seq); + $self->{edit_dist} = adist($clip, $seq); + $self->{index} = aindex($clip, $seq); +} + +sub _run_system_cmd { + my ($self, $cmd) = @_; + my @cmd_output; + + eval { + @cmd_output = qx{$cmd 2>&1}; + if ( ($? << 8) != 0 ) { + die "@cmd_output"; + } + }; + if ($@) { + die "Error executing command $cmd: $@"; + } + + return @cmd_output; +} + +1;