annotate 2.4/lib/LevD.pm @ 18:1163c16cb3c0 draft

Uploaded
author plus91-technologies-pvt-ltd
date Mon, 02 Jun 2014 07:35:53 -0400
parents 8eb7d93f7e58
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 package LevD;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 use lib "/home/plus91/shed_tools/toolshed.g2.bx.psu.edu/repos/plus91-technologies-pvt-ltd/softsearch/e3609c8714fb/softsearch/2.4/lib/perl5";
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4 use strict;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5 use warnings;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 use Data::Dumper;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7 use String::Approx 'adist';
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8 use String::Approx 'adistr';
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9 use String::Approx 'aindex';
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 my $WINDOW_SIZE = 100;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 sub new {
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 my ($class, $file) = @_;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15 my $self = {};
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17 bless($self,$class);
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18 $self->init();
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20 return $self;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21 }
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 sub init {
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 my ($self) = @_;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26 #### default values.
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 $self->{index} = 0;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 $self->{relative_edit_dist} = 0;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29 $self->{edit_dist} = 0;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 }
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32 sub search {
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33 my ($self, $clip, $chr, $start, $stop, $ref) = @_;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35 if (! -s $ref) {
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 die "ERROR: Reference file $ref now found\n";
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 }
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 #### extact seq from reference file.
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 my $target = $chr .":". $start ."-". $stop;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 my $cmd = "samtools faidx $ref $target";
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 my @output = $self->_run_system_cmd($cmd);
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 #### depending on ref file format seq could be on multiple lines
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46 #### concatinate all except for the header in one line.
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 #### e.g:
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48 #### >chr1:8222999-8223099
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 #### GGTGCAATCATAGCTCACTAAGCTTCAACCTCAAGAGATCCTCCCACCTCAGCCTCCCAG
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 #### GTAGCTGGGACTACAGGCAAATGCCATGACACCTAGCTAAT
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 my $seq = join("", @output[1..$#output]);
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 #### remove new line character
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 $seq =~ s/\n//g;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56 #### find number of mismatches and start index
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 #### of clip to be searched against target seq.
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58 $self->{relative_edit_dist} = adistr($clip, $seq);
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59 $self->{edit_dist} = adist($clip, $seq);
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60 $self->{index} = aindex($clip, $seq);
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61 }
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 sub _run_system_cmd {
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 my ($self, $cmd) = @_;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 my @cmd_output;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 eval {
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68 @cmd_output = qx{$cmd 2>&1};
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 if ( ($? << 8) != 0 ) {
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 die "@cmd_output";
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
71 }
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
72 };
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
73 if ($@) {
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
74 die "Error executing command $cmd: $@";
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
75 }
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
76
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
77 return @cmd_output;
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
78 }
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
79
8eb7d93f7e58 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
80 1;