13
|
1 package LevD;
|
|
2
|
|
3 use lib "/data2/bsi/reference/softsearch/lib/perl5";
|
|
4 use strict;
|
|
5 use warnings;
|
|
6 use Data::Dumper;
|
|
7 use String::Approx 'adist';
|
|
8 use String::Approx 'adistr';
|
|
9 use String::Approx 'aindex';
|
|
10
|
|
11 my $WINDOW_SIZE = 100;
|
|
12
|
|
13 sub new {
|
|
14 my ($class, $file) = @_;
|
|
15 my $self = {};
|
|
16
|
|
17 bless($self,$class);
|
|
18 $self->init();
|
|
19
|
|
20 return $self;
|
|
21 }
|
|
22
|
|
23 sub init {
|
|
24 my ($self) = @_;
|
|
25
|
|
26 #### default values.
|
|
27 $self->{index} = 0;
|
|
28 $self->{relative_edit_dist} = 0;
|
|
29 $self->{edit_dist} = 0;
|
|
30 }
|
|
31
|
|
32 sub search {
|
|
33 my ($self, $clip, $chr, $start, $stop, $ref) = @_;
|
|
34
|
|
35 if (! -s $ref) {
|
|
36 die "ERROR: Reference file $ref now found\n";
|
|
37 }
|
|
38
|
|
39 #### extact seq from reference file.
|
|
40 my $target = $chr .":". $start ."-". $stop;
|
|
41 my $cmd = "samtools faidx $ref $target";
|
|
42
|
|
43 my @output = $self->_run_system_cmd($cmd);
|
|
44
|
|
45 #### depending on ref file format seq could be on multiple lines
|
|
46 #### concatinate all except for the header in one line.
|
|
47 #### e.g:
|
|
48 #### >chr1:8222999-8223099
|
|
49 #### GGTGCAATCATAGCTCACTAAGCTTCAACCTCAAGAGATCCTCCCACCTCAGCCTCCCAG
|
|
50 #### GTAGCTGGGACTACAGGCAAATGCCATGACACCTAGCTAAT
|
|
51 my $seq = join("", @output[1..$#output]);
|
|
52
|
|
53 #### remove new line character
|
|
54 $seq =~ s/\n//g;
|
|
55
|
|
56 #### find number of mismatches and start index
|
|
57 #### of clip to be searched against target seq.
|
|
58 $self->{relative_edit_dist} = adistr($clip, $seq);
|
|
59 $self->{edit_dist} = adist($clip, $seq);
|
|
60 $self->{index} = aindex($clip, $seq);
|
|
61 }
|
|
62
|
|
63 sub _run_system_cmd {
|
|
64 my ($self, $cmd) = @_;
|
|
65 my @cmd_output;
|
|
66
|
|
67 eval {
|
|
68 @cmd_output = qx{$cmd 2>&1};
|
|
69 if ( ($? << 8) != 0 ) {
|
|
70 die "@cmd_output";
|
|
71 }
|
|
72 };
|
|
73 if ($@) {
|
|
74 die "Error executing command $cmd: $@";
|
|
75 }
|
|
76
|
|
77 return @cmd_output;
|
|
78 }
|
|
79
|
|
80 1;
|