comparison 2.4/src/ReadCluster.pl @ 16:8eb7d93f7e58 draft

Uploaded
author plus91-technologies-pvt-ltd
date Sat, 31 May 2014 11:23:36 -0400
parents e3609c8714fb
children
comparison
equal deleted inserted replaced
15:da93b6f4d684 16:8eb7d93f7e58
1 #!/usr/bin/perl
2
3 =head1 NAME
4 ReadCluster.pl
5
6 =head1 SYNOPSIS
7
8 USAGE: ReadCluster.pl --input input_sam_file --output output_prefix [--threshold 10000 --minClusterSize 4]
9
10 =head1 OPTIONS
11
12 B<--input,-i>
13 Input file
14
15 B<--output,-o>
16 output prefix
17
18 B<--window, -w>
19 Window size
20
21 B<--minClusterSize, -m>
22 Min size of cluster
23
24 B<--help,-h>
25 This help message
26
27 =head1 DESCRIPTION
28
29
30 =head1 INPUT
31
32
33 =head1 OUTPUT
34
35
36 =head1 CONTACT
37 Jaysheel D. Bhavsar @ bjaysheel[at]gmail[dot]com
38
39
40 ==head1 EXAMPLE
41 ReadCluster.pl --input=filename.sam --window=10000 --output=PREFIX
42
43 =cut
44
45 use strict;
46 use warnings;
47 use Data::Dumper;
48 use DBI;
49 use Pod::Usage;
50 use Scalar::Util qw(looks_like_number);
51 use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
52
53 my %options = ();
54 my $results = GetOptions (\%options,
55 'input|i=s',
56 'output|o=s',
57 'window|w=s',
58 'minClusterSize|m=s',
59 'help|h') || pod2usage();
60
61 ## display documentation
62 if( $options{'help'} ){
63 pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
64 }
65 #############################################################################
66 ## make sure everything passed was peachy
67 &check_parameters(\%options);
68
69 my $r1_start = 0;
70 my $r2_start = 0;
71 my $r1_end = $r1_start + $options{window};
72 my $r2_end = $r2_start + $options{window};
73 my $r1_chr = "";
74 my $r2_chr = "";
75
76 my @cluster = ();
77
78 open (FHD, "<", $options{input}) or die "Cound not open file $options{input}\n";
79 open (INTRA, ">", $options{output} . ".intra.sam") or die "Cound not open file $options{output}.intra.sam\n";
80 open (INTER, ">", $options{output} . ".inter.sam") or die "Cound not open file $options{output}.inter.sam\n";
81
82 while (<FHD>){
83 chomp $_;
84
85 #skip processing lines starting with @ just print to output file.
86 if ($_ =~ /^@/){
87 print INTRA $_."\n";
88 print INTER $_."\n";
89 next;
90 }
91 #print "$_\n";
92 check_sequence($_);
93 }
94
95 close(FHD);
96 close(INTRA);
97 close(INTER);
98
99 exit(0);
100
101 #############################################################################
102 sub check_parameters {
103 my $options = shift;
104
105 my @required = ("input", "output");
106
107 foreach my $key (@required) {
108 unless ($options{$key}) {
109 print STDERR "ARG: $key is required\n";
110 pod2usage({-exitval => 2, -message => "error message", -verbose => 1, -output => \*STDERR});
111 exit(-1);
112 }
113 }
114
115 unless($options{window}) { $options{window} = 10000; }
116 unless($options{minClusterSize}) { $options{minClusterSize} = 4; }
117 }
118
119 #############################################################################
120 sub check_sequence {
121 my $line = shift;
122
123 my @data = split(/\t/, $line);
124
125 ## check if mates are within the window.
126 if ((inWindow($data[3], 1)) && (inWindow($data[7], 2)) &&
127 ($r1_chr =~ /$data[2]/) && ($r2_chr =~ /$data[6]/)) {
128
129 ## if minClusterSize is reached output
130 if (scalar(@cluster) >= $options{minClusterSize}) {
131
132 ## if chr are the same then print intra-chr else inter-chr
133 if ($data[6] =~ /=/) {
134 print INTRA $line."\n";
135 } else {
136 print INTER $line."\n";
137 }
138 } else {
139 push @cluster, $line;
140 }
141 } else {
142
143 if (scalar(@cluster) >= $options{minClusterSize}) {
144 dumpCluster(@cluster);
145 }
146
147 @cluster = ();
148 $r1_start = $data[3];
149 $r2_start = $data[7];
150 $r1_end = $r1_start + $options{window};
151 $r2_end = $r2_start + $options{window};
152 $r1_chr = $data[2];
153 $r2_chr = $data[6];
154 }
155 }
156
157 #############################################################################
158 sub inWindow {
159 my $coord = shift;
160 my $read = shift;
161
162 my $start = 0;
163 my $end = 0;
164
165 if ($read == 1) {
166 $start = $r1_start;
167 $end = $r1_end;
168 } else {
169 $start = $r2_start;
170 $end = $r2_end;
171 }
172
173 if (($coord > $start) && ($coord < $end)){
174 return 1;
175 } else { return 0; }
176 }
177
178 #############################################################################
179 sub dumpCluster {
180 my @cluster = shift;
181
182 foreach (@cluster){
183 my @data = split(/\t/, $_);
184
185 if ($data[6] =~ /=/) {
186 print INTRA $_."\n";
187 } else {
188 print INTER $_."\n";
189 }
190 }
191 }