comparison 2.4/library/LevD.pm @ 18:1163c16cb3c0 draft

Uploaded
author plus91-technologies-pvt-ltd
date Mon, 02 Jun 2014 07:35:53 -0400
parents e3609c8714fb
children
comparison
equal deleted inserted replaced
17:5343ef57827f 18:1163c16cb3c0
1 package LevD;
2
3 use lib "/data2/bsi/reference/softsearch/lib/perl5";
4 use strict;
5 use warnings;
6 use Data::Dumper;
7 use String::Approx 'adist';
8 use String::Approx 'adistr';
9 use String::Approx 'aindex';
10
11 my $WINDOW_SIZE = 100;
12
13 sub new {
14 my ($class, $file) = @_;
15 my $self = {};
16
17 bless($self,$class);
18 $self->init();
19
20 return $self;
21 }
22
23 sub init {
24 my ($self) = @_;
25
26 #### default values.
27 $self->{index} = 0;
28 $self->{relative_edit_dist} = 0;
29 $self->{edit_dist} = 0;
30 }
31
32 sub search {
33 my ($self, $clip, $chr, $start, $stop, $ref) = @_;
34
35 if (! -s $ref) {
36 die "ERROR: Reference file $ref now found\n";
37 }
38
39 #### extact seq from reference file.
40 my $target = $chr .":". $start ."-". $stop;
41 my $cmd = "samtools faidx $ref $target";
42
43 my @output = $self->_run_system_cmd($cmd);
44
45 #### depending on ref file format seq could be on multiple lines
46 #### concatinate all except for the header in one line.
47 #### e.g:
48 #### >chr1:8222999-8223099
49 #### GGTGCAATCATAGCTCACTAAGCTTCAACCTCAAGAGATCCTCCCACCTCAGCCTCCCAG
50 #### GTAGCTGGGACTACAGGCAAATGCCATGACACCTAGCTAAT
51 my $seq = join("", @output[1..$#output]);
52
53 #### remove new line character
54 $seq =~ s/\n//g;
55
56 #### find number of mismatches and start index
57 #### of clip to be searched against target seq.
58 $self->{relative_edit_dist} = adistr($clip, $seq);
59 $self->{edit_dist} = adist($clip, $seq);
60 $self->{index} = aindex($clip, $seq);
61 }
62
63 sub _run_system_cmd {
64 my ($self, $cmd) = @_;
65 my @cmd_output;
66
67 eval {
68 @cmd_output = qx{$cmd 2>&1};
69 if ( ($? << 8) != 0 ) {
70 die "@cmd_output";
71 }
72 };
73 if ($@) {
74 die "Error executing command $cmd: $@";
75 }
76
77 return @cmd_output;
78 }
79
80 1;