diff 2.4/library/LevD.pm @ 13:e3609c8714fb draft

Uploaded
author plus91-technologies-pvt-ltd
date Fri, 30 May 2014 03:37:55 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/2.4/library/LevD.pm	Fri May 30 03:37:55 2014 -0400
@@ -0,0 +1,80 @@
+package LevD;
+
+use lib "/data2/bsi/reference/softsearch/lib/perl5";
+use strict;
+use warnings;
+use Data::Dumper;
+use String::Approx 'adist';
+use String::Approx 'adistr';
+use String::Approx 'aindex';
+
+my $WINDOW_SIZE = 100;
+
+sub new {
+	my ($class, $file) = @_;
+    my $self = {};
+
+ 	bless($self,$class);
+	$self->init();
+
+	return $self;
+}
+
+sub init {
+	my ($self) = @_;
+
+	#### default values.
+	$self->{index} = 0;
+	$self->{relative_edit_dist} = 0;
+	$self->{edit_dist} = 0;
+}
+
+sub search {
+	my ($self, $clip, $chr, $start, $stop, $ref) = @_;
+
+	if (! -s $ref) {
+		die "ERROR: Reference file $ref now found\n";
+	}
+
+	#### extact seq from reference file.
+	my $target = $chr .":". $start ."-". $stop;
+	my $cmd = "samtools faidx $ref $target";
+
+	my @output = $self->_run_system_cmd($cmd);
+
+	#### depending on ref file format seq could be on multiple lines
+	#### concatinate all except for the header in one line.
+	#### e.g:
+	#### >chr1:8222999-8223099
+	#### GGTGCAATCATAGCTCACTAAGCTTCAACCTCAAGAGATCCTCCCACCTCAGCCTCCCAG
+	#### GTAGCTGGGACTACAGGCAAATGCCATGACACCTAGCTAAT
+	my $seq = join("", @output[1..$#output]);
+
+	#### remove new line character
+	$seq =~ s/\n//g;
+
+	#### find number of mismatches and start index
+	#### of clip to be searched against target seq.
+	$self->{relative_edit_dist} = adistr($clip, $seq);
+	$self->{edit_dist} = adist($clip, $seq);
+	$self->{index} = aindex($clip, $seq);
+}
+
+sub _run_system_cmd {
+	my ($self, $cmd) = @_;
+	my @cmd_output;
+
+	eval {
+		@cmd_output = qx{$cmd 2>&1};
+		if ( ($? << 8) != 0 ) {
+			die "@cmd_output";
+		}
+	};
+	if ($@) {
+		die "Error executing command $cmd: $@";
+	}
+
+	return @cmd_output;
+}
+
+1;