Mercurial > repos > plus91-technologies-pvt-ltd > softsearch
view 2.4/src/ReadCluster.pl @ 13:e3609c8714fb draft
Uploaded
author | plus91-technologies-pvt-ltd |
---|---|
date | Fri, 30 May 2014 03:37:55 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl =head1 NAME ReadCluster.pl =head1 SYNOPSIS USAGE: ReadCluster.pl --input input_sam_file --output output_prefix [--threshold 10000 --minClusterSize 4] =head1 OPTIONS B<--input,-i> Input file B<--output,-o> output prefix B<--window, -w> Window size B<--minClusterSize, -m> Min size of cluster B<--help,-h> This help message =head1 DESCRIPTION =head1 INPUT =head1 OUTPUT =head1 CONTACT Jaysheel D. Bhavsar @ bjaysheel[at]gmail[dot]com ==head1 EXAMPLE ReadCluster.pl --input=filename.sam --window=10000 --output=PREFIX =cut use strict; use warnings; use Data::Dumper; use DBI; use Pod::Usage; use Scalar::Util qw(looks_like_number); use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through); my %options = (); my $results = GetOptions (\%options, 'input|i=s', 'output|o=s', 'window|w=s', 'minClusterSize|m=s', 'help|h') || pod2usage(); ## display documentation if( $options{'help'} ){ pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} ); } ############################################################################# ## make sure everything passed was peachy &check_parameters(\%options); my $r1_start = 0; my $r2_start = 0; my $r1_end = $r1_start + $options{window}; my $r2_end = $r2_start + $options{window}; my $r1_chr = ""; my $r2_chr = ""; my @cluster = (); open (FHD, "<", $options{input}) or die "Cound not open file $options{input}\n"; open (INTRA, ">", $options{output} . ".intra.sam") or die "Cound not open file $options{output}.intra.sam\n"; open (INTER, ">", $options{output} . ".inter.sam") or die "Cound not open file $options{output}.inter.sam\n"; while (<FHD>){ chomp $_; #skip processing lines starting with @ just print to output file. if ($_ =~ /^@/){ print INTRA $_."\n"; print INTER $_."\n"; next; } #print "$_\n"; check_sequence($_); } close(FHD); close(INTRA); close(INTER); exit(0); ############################################################################# sub check_parameters { my $options = shift; my @required = ("input", "output"); foreach my $key (@required) { unless ($options{$key}) { print STDERR "ARG: $key is required\n"; pod2usage({-exitval => 2, -message => "error message", -verbose => 1, -output => \*STDERR}); exit(-1); } } unless($options{window}) { $options{window} = 10000; } unless($options{minClusterSize}) { $options{minClusterSize} = 4; } } ############################################################################# sub check_sequence { my $line = shift; my @data = split(/\t/, $line); ## check if mates are within the window. if ((inWindow($data[3], 1)) && (inWindow($data[7], 2)) && ($r1_chr =~ /$data[2]/) && ($r2_chr =~ /$data[6]/)) { ## if minClusterSize is reached output if (scalar(@cluster) >= $options{minClusterSize}) { ## if chr are the same then print intra-chr else inter-chr if ($data[6] =~ /=/) { print INTRA $line."\n"; } else { print INTER $line."\n"; } } else { push @cluster, $line; } } else { if (scalar(@cluster) >= $options{minClusterSize}) { dumpCluster(@cluster); } @cluster = (); $r1_start = $data[3]; $r2_start = $data[7]; $r1_end = $r1_start + $options{window}; $r2_end = $r2_start + $options{window}; $r1_chr = $data[2]; $r2_chr = $data[6]; } } ############################################################################# sub inWindow { my $coord = shift; my $read = shift; my $start = 0; my $end = 0; if ($read == 1) { $start = $r1_start; $end = $r1_end; } else { $start = $r2_start; $end = $r2_end; } if (($coord > $start) && ($coord < $end)){ return 1; } else { return 0; } } ############################################################################# sub dumpCluster { my @cluster = shift; foreach (@cluster){ my @data = split(/\t/, $_); if ($data[6] =~ /=/) { print INTRA $_."\n"; } else { print INTER $_."\n"; } } }