Mercurial > repos > plus91-technologies-pvt-ltd > softsearch
diff 2.4/src/ReadCluster.pl @ 16:8eb7d93f7e58 draft
Uploaded
author | plus91-technologies-pvt-ltd |
---|---|
date | Sat, 31 May 2014 11:23:36 -0400 |
parents | e3609c8714fb |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2.4/src/ReadCluster.pl Sat May 31 11:23:36 2014 -0400 @@ -0,0 +1,191 @@ +#!/usr/bin/perl + +=head1 NAME + ReadCluster.pl + +=head1 SYNOPSIS + + USAGE: ReadCluster.pl --input input_sam_file --output output_prefix [--threshold 10000 --minClusterSize 4] + +=head1 OPTIONS + +B<--input,-i> + Input file + +B<--output,-o> + output prefix + +B<--window, -w> + Window size + +B<--minClusterSize, -m> + Min size of cluster + +B<--help,-h> + This help message + +=head1 DESCRIPTION + + +=head1 INPUT + + +=head1 OUTPUT + + +=head1 CONTACT + Jaysheel D. Bhavsar @ bjaysheel[at]gmail[dot]com + + +==head1 EXAMPLE + ReadCluster.pl --input=filename.sam --window=10000 --output=PREFIX + +=cut + +use strict; +use warnings; +use Data::Dumper; +use DBI; +use Pod::Usage; +use Scalar::Util qw(looks_like_number); +use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through); + +my %options = (); +my $results = GetOptions (\%options, + 'input|i=s', + 'output|o=s', + 'window|w=s', + 'minClusterSize|m=s', + 'help|h') || pod2usage(); + +## display documentation +if( $options{'help'} ){ + pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} ); +} +############################################################################# +## make sure everything passed was peachy +&check_parameters(\%options); + +my $r1_start = 0; +my $r2_start = 0; +my $r1_end = $r1_start + $options{window}; +my $r2_end = $r2_start + $options{window}; +my $r1_chr = ""; +my $r2_chr = ""; + +my @cluster = (); + +open (FHD, "<", $options{input}) or die "Cound not open file $options{input}\n"; +open (INTRA, ">", $options{output} . ".intra.sam") or die "Cound not open file $options{output}.intra.sam\n"; +open (INTER, ">", $options{output} . ".inter.sam") or die "Cound not open file $options{output}.inter.sam\n"; + +while (<FHD>){ + chomp $_; + + #skip processing lines starting with @ just print to output file. + if ($_ =~ /^@/){ + print INTRA $_."\n"; + print INTER $_."\n"; + next; + } +#print "$_\n"; + check_sequence($_); +} + +close(FHD); +close(INTRA); +close(INTER); + +exit(0); + +############################################################################# +sub check_parameters { + my $options = shift; + + my @required = ("input", "output"); + + foreach my $key (@required) { + unless ($options{$key}) { + print STDERR "ARG: $key is required\n"; + pod2usage({-exitval => 2, -message => "error message", -verbose => 1, -output => \*STDERR}); + exit(-1); + } + } + + unless($options{window}) { $options{window} = 10000; } + unless($options{minClusterSize}) { $options{minClusterSize} = 4; } +} + +############################################################################# +sub check_sequence { + my $line = shift; + + my @data = split(/\t/, $line); + + ## check if mates are within the window. + if ((inWindow($data[3], 1)) && (inWindow($data[7], 2)) && + ($r1_chr =~ /$data[2]/) && ($r2_chr =~ /$data[6]/)) { + + ## if minClusterSize is reached output + if (scalar(@cluster) >= $options{minClusterSize}) { + + ## if chr are the same then print intra-chr else inter-chr + if ($data[6] =~ /=/) { + print INTRA $line."\n"; + } else { + print INTER $line."\n"; + } + } else { + push @cluster, $line; + } + } else { + + if (scalar(@cluster) >= $options{minClusterSize}) { + dumpCluster(@cluster); + } + + @cluster = (); + $r1_start = $data[3]; + $r2_start = $data[7]; + $r1_end = $r1_start + $options{window}; + $r2_end = $r2_start + $options{window}; + $r1_chr = $data[2]; + $r2_chr = $data[6]; + } +} + +############################################################################# +sub inWindow { + my $coord = shift; + my $read = shift; + + my $start = 0; + my $end = 0; + + if ($read == 1) { + $start = $r1_start; + $end = $r1_end; + } else { + $start = $r2_start; + $end = $r2_end; + } + + if (($coord > $start) && ($coord < $end)){ + return 1; + } else { return 0; } +} + +############################################################################# +sub dumpCluster { + my @cluster = shift; + + foreach (@cluster){ + my @data = split(/\t/, $_); + + if ($data[6] =~ /=/) { + print INTRA $_."\n"; + } else { + print INTER $_."\n"; + } + } +}