Mercurial > repos > plus91-technologies-pvt-ltd > softsearch
view 2.4/lib/LevD.pm @ 18:1163c16cb3c0 draft
Uploaded
author | plus91-technologies-pvt-ltd |
---|---|
date | Mon, 02 Jun 2014 07:35:53 -0400 |
parents | 8eb7d93f7e58 |
children |
line wrap: on
line source
package LevD; use lib "/home/plus91/shed_tools/toolshed.g2.bx.psu.edu/repos/plus91-technologies-pvt-ltd/softsearch/e3609c8714fb/softsearch/2.4/lib/perl5"; use strict; use warnings; use Data::Dumper; use String::Approx 'adist'; use String::Approx 'adistr'; use String::Approx 'aindex'; my $WINDOW_SIZE = 100; sub new { my ($class, $file) = @_; my $self = {}; bless($self,$class); $self->init(); return $self; } sub init { my ($self) = @_; #### default values. $self->{index} = 0; $self->{relative_edit_dist} = 0; $self->{edit_dist} = 0; } sub search { my ($self, $clip, $chr, $start, $stop, $ref) = @_; if (! -s $ref) { die "ERROR: Reference file $ref now found\n"; } #### extact seq from reference file. my $target = $chr .":". $start ."-". $stop; my $cmd = "samtools faidx $ref $target"; my @output = $self->_run_system_cmd($cmd); #### depending on ref file format seq could be on multiple lines #### concatinate all except for the header in one line. #### e.g: #### >chr1:8222999-8223099 #### GGTGCAATCATAGCTCACTAAGCTTCAACCTCAAGAGATCCTCCCACCTCAGCCTCCCAG #### GTAGCTGGGACTACAGGCAAATGCCATGACACCTAGCTAAT my $seq = join("", @output[1..$#output]); #### remove new line character $seq =~ s/\n//g; #### find number of mismatches and start index #### of clip to be searched against target seq. $self->{relative_edit_dist} = adistr($clip, $seq); $self->{edit_dist} = adist($clip, $seq); $self->{index} = aindex($clip, $seq); } sub _run_system_cmd { my ($self, $cmd) = @_; my @cmd_output; eval { @cmd_output = qx{$cmd 2>&1}; if ( ($? << 8) != 0 ) { die "@cmd_output"; } }; if ($@) { die "Error executing command $cmd: $@"; } return @cmd_output; } 1;