Mercurial > repos > plus91-technologies-pvt-ltd > softsearch
comparison 2.4/src/ReadCluster.pl @ 13:e3609c8714fb draft
Uploaded
author | plus91-technologies-pvt-ltd |
---|---|
date | Fri, 30 May 2014 03:37:55 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
12:980fce54e60a | 13:e3609c8714fb |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 =head1 NAME | |
4 ReadCluster.pl | |
5 | |
6 =head1 SYNOPSIS | |
7 | |
8 USAGE: ReadCluster.pl --input input_sam_file --output output_prefix [--threshold 10000 --minClusterSize 4] | |
9 | |
10 =head1 OPTIONS | |
11 | |
12 B<--input,-i> | |
13 Input file | |
14 | |
15 B<--output,-o> | |
16 output prefix | |
17 | |
18 B<--window, -w> | |
19 Window size | |
20 | |
21 B<--minClusterSize, -m> | |
22 Min size of cluster | |
23 | |
24 B<--help,-h> | |
25 This help message | |
26 | |
27 =head1 DESCRIPTION | |
28 | |
29 | |
30 =head1 INPUT | |
31 | |
32 | |
33 =head1 OUTPUT | |
34 | |
35 | |
36 =head1 CONTACT | |
37 Jaysheel D. Bhavsar @ bjaysheel[at]gmail[dot]com | |
38 | |
39 | |
40 ==head1 EXAMPLE | |
41 ReadCluster.pl --input=filename.sam --window=10000 --output=PREFIX | |
42 | |
43 =cut | |
44 | |
45 use strict; | |
46 use warnings; | |
47 use Data::Dumper; | |
48 use DBI; | |
49 use Pod::Usage; | |
50 use Scalar::Util qw(looks_like_number); | |
51 use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through); | |
52 | |
53 my %options = (); | |
54 my $results = GetOptions (\%options, | |
55 'input|i=s', | |
56 'output|o=s', | |
57 'window|w=s', | |
58 'minClusterSize|m=s', | |
59 'help|h') || pod2usage(); | |
60 | |
61 ## display documentation | |
62 if( $options{'help'} ){ | |
63 pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} ); | |
64 } | |
65 ############################################################################# | |
66 ## make sure everything passed was peachy | |
67 &check_parameters(\%options); | |
68 | |
69 my $r1_start = 0; | |
70 my $r2_start = 0; | |
71 my $r1_end = $r1_start + $options{window}; | |
72 my $r2_end = $r2_start + $options{window}; | |
73 my $r1_chr = ""; | |
74 my $r2_chr = ""; | |
75 | |
76 my @cluster = (); | |
77 | |
78 open (FHD, "<", $options{input}) or die "Cound not open file $options{input}\n"; | |
79 open (INTRA, ">", $options{output} . ".intra.sam") or die "Cound not open file $options{output}.intra.sam\n"; | |
80 open (INTER, ">", $options{output} . ".inter.sam") or die "Cound not open file $options{output}.inter.sam\n"; | |
81 | |
82 while (<FHD>){ | |
83 chomp $_; | |
84 | |
85 #skip processing lines starting with @ just print to output file. | |
86 if ($_ =~ /^@/){ | |
87 print INTRA $_."\n"; | |
88 print INTER $_."\n"; | |
89 next; | |
90 } | |
91 #print "$_\n"; | |
92 check_sequence($_); | |
93 } | |
94 | |
95 close(FHD); | |
96 close(INTRA); | |
97 close(INTER); | |
98 | |
99 exit(0); | |
100 | |
101 ############################################################################# | |
102 sub check_parameters { | |
103 my $options = shift; | |
104 | |
105 my @required = ("input", "output"); | |
106 | |
107 foreach my $key (@required) { | |
108 unless ($options{$key}) { | |
109 print STDERR "ARG: $key is required\n"; | |
110 pod2usage({-exitval => 2, -message => "error message", -verbose => 1, -output => \*STDERR}); | |
111 exit(-1); | |
112 } | |
113 } | |
114 | |
115 unless($options{window}) { $options{window} = 10000; } | |
116 unless($options{minClusterSize}) { $options{minClusterSize} = 4; } | |
117 } | |
118 | |
119 ############################################################################# | |
120 sub check_sequence { | |
121 my $line = shift; | |
122 | |
123 my @data = split(/\t/, $line); | |
124 | |
125 ## check if mates are within the window. | |
126 if ((inWindow($data[3], 1)) && (inWindow($data[7], 2)) && | |
127 ($r1_chr =~ /$data[2]/) && ($r2_chr =~ /$data[6]/)) { | |
128 | |
129 ## if minClusterSize is reached output | |
130 if (scalar(@cluster) >= $options{minClusterSize}) { | |
131 | |
132 ## if chr are the same then print intra-chr else inter-chr | |
133 if ($data[6] =~ /=/) { | |
134 print INTRA $line."\n"; | |
135 } else { | |
136 print INTER $line."\n"; | |
137 } | |
138 } else { | |
139 push @cluster, $line; | |
140 } | |
141 } else { | |
142 | |
143 if (scalar(@cluster) >= $options{minClusterSize}) { | |
144 dumpCluster(@cluster); | |
145 } | |
146 | |
147 @cluster = (); | |
148 $r1_start = $data[3]; | |
149 $r2_start = $data[7]; | |
150 $r1_end = $r1_start + $options{window}; | |
151 $r2_end = $r2_start + $options{window}; | |
152 $r1_chr = $data[2]; | |
153 $r2_chr = $data[6]; | |
154 } | |
155 } | |
156 | |
157 ############################################################################# | |
158 sub inWindow { | |
159 my $coord = shift; | |
160 my $read = shift; | |
161 | |
162 my $start = 0; | |
163 my $end = 0; | |
164 | |
165 if ($read == 1) { | |
166 $start = $r1_start; | |
167 $end = $r1_end; | |
168 } else { | |
169 $start = $r2_start; | |
170 $end = $r2_end; | |
171 } | |
172 | |
173 if (($coord > $start) && ($coord < $end)){ | |
174 return 1; | |
175 } else { return 0; } | |
176 } | |
177 | |
178 ############################################################################# | |
179 sub dumpCluster { | |
180 my @cluster = shift; | |
181 | |
182 foreach (@cluster){ | |
183 my @data = split(/\t/, $_); | |
184 | |
185 if ($data[6] =~ /=/) { | |
186 print INTRA $_."\n"; | |
187 } else { | |
188 print INTER $_."\n"; | |
189 } | |
190 } | |
191 } |