Mercurial > repos > plus91-technologies-pvt-ltd > softsearch
view 2.4/lib/LevD.pm @ 13:e3609c8714fb draft
Uploaded
author | plus91-technologies-pvt-ltd |
---|---|
date | Fri, 30 May 2014 03:37:55 -0400 |
parents | |
children |
line wrap: on
line source
package LevD; use lib "/home/plus91/2.4/lib/perl5"; use strict; use warnings; use Data::Dumper; use String::Approx 'adist'; use String::Approx 'adistr'; use String::Approx 'aindex'; my $WINDOW_SIZE = 100; sub new { my ($class, $file) = @_; my $self = {}; bless($self,$class); $self->init(); return $self; } sub init { my ($self) = @_; #### default values. $self->{index} = 0; $self->{relative_edit_dist} = 0; $self->{edit_dist} = 0; } sub search { my ($self, $clip, $chr, $start, $stop, $ref) = @_; if (! -s $ref) { die "ERROR: Reference file $ref now found\n"; } #### extact seq from reference file. my $target = $chr .":". $start ."-". $stop; my $cmd = "samtools faidx $ref $target"; my @output = $self->_run_system_cmd($cmd); #### depending on ref file format seq could be on multiple lines #### concatinate all except for the header in one line. #### e.g: #### >chr1:8222999-8223099 #### GGTGCAATCATAGCTCACTAAGCTTCAACCTCAAGAGATCCTCCCACCTCAGCCTCCCAG #### GTAGCTGGGACTACAGGCAAATGCCATGACACCTAGCTAAT my $seq = join("", @output[1..$#output]); #### remove new line character $seq =~ s/\n//g; #### find number of mismatches and start index #### of clip to be searched against target seq. $self->{relative_edit_dist} = adistr($clip, $seq); $self->{edit_dist} = adist($clip, $seq); $self->{index} = aindex($clip, $seq); } sub _run_system_cmd { my ($self, $cmd) = @_; my @cmd_output; eval { @cmd_output = qx{$cmd 2>&1}; if ( ($? << 8) != 0 ) { die "@cmd_output"; } }; if ($@) { die "Error executing command $cmd: $@"; } return @cmd_output; } 1;