Mercurial > repos > jjohnson > crest
comparison getUniqSV.pl @ 0:acc8d8bfeb9a
Uploaded
author | jjohnson |
---|---|
date | Wed, 08 Feb 2012 16:59:24 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acc8d8bfeb9a |
---|---|
1 #!/nfs_exports/apps/64-bit/gnu-apps/perl5.8.9/bin/perl -w | |
2 | |
3 use strict; | |
4 use Carp; | |
5 use Getopt::Long; | |
6 use English; | |
7 use Pod::Usage; | |
8 use Data::Dumper; | |
9 use DBI; | |
10 use DBD::mysql; | |
11 | |
12 my $db = 'hg18'; | |
13 my $port = 3306; | |
14 my $host = '10.4.19.125'; | |
15 my $USER = "sjXuser"; | |
16 | |
17 my ( $help, $man, $version, $usage ); | |
18 my ($in, $normal, $out); | |
19 my $optionOK = GetOptions( | |
20 'd|i|in|input=s' => \$in, | |
21 'g|normal=s' => \$normal, | |
22 'o|out|output=s' => \$out, | |
23 'h|help|?' => \$help, | |
24 'man' => \$man, | |
25 'usage' => \$usage, | |
26 'v|version' => \$version, | |
27 ); | |
28 | |
29 pod2usage(2) if($man); | |
30 pod2usage( -verbose => 99, -sections => "USAGE|REQUIRED ARGUMENTS|OPTIONS" ) | |
31 if($help or $usage); | |
32 pod2usage( -verbose => 99, -sections => "VERSION") if($version); | |
33 | |
34 croak "Missing input file" if(!$in); | |
35 open STDOUT, ">$out" if($out); | |
36 open my $IN, "<$in" or croak "can't open $in: $OS_ERROR"; | |
37 open my $NORMAL, "<$normal" or croak "can't open $normal: $OS_ERROR" if($normal); | |
38 | |
39 my $dbh = DBI->connect("dbi:mysql:$db:$host:$port", $USER) or | |
40 die "Can't connect to MySQL database: $DBI::errstr\n"; | |
41 | |
42 my @SV; | |
43 my @normal_SV; | |
44 if($normal) { | |
45 while(my $line = <$NORMAL>) { | |
46 chomp $line; | |
47 my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line; | |
48 next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type)); | |
49 push @normal_SV, [$chrA, $posA, $chrB, $posB, $type]; | |
50 } | |
51 } | |
52 while( my $line = <$IN> ) { | |
53 chomp $line; | |
54 my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line; | |
55 next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type)); # the SV exists in normal sample | |
56 push @normal_SV, [$chrA, $posA, $chrB, $posB, $type]; | |
57 print STDOUT $line; | |
58 if($type eq 'DEL') { | |
59 my $tmp = get_gene_info($chrA, $posA, $posB); | |
60 print STDOUT "\t", $tmp if($tmp); | |
61 } | |
62 else { | |
63 print STDOUT "\t", get_gene_info($chrA, $posA, $posA); | |
64 print STDOUT "\t", get_gene_info($chrB, $posB, $posB); | |
65 } | |
66 print STDOUT "\n"; | |
67 } | |
68 close $IN; | |
69 close STDOUT if($out); | |
70 $dbh->disconnect; | |
71 exit(0); | |
72 | |
73 sub search_sv { | |
74 my ($r_SV, $chrA, $posA, $chrB, $posB, $type) = @_; | |
75 | |
76 foreach my $sv (@{$r_SV}) { | |
77 return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 20 && | |
78 $sv->[2] eq $chrB && abs($sv->[3] - $posB) < 20 && $sv->[4] eq $type); | |
79 return 1 if( $sv->[0] eq $chrB && abs($sv->[1] - $posB) < 20 && | |
80 $sv->[2] eq $chrA && abs($sv->[3] - $posA) < 20 && $sv->[4] eq $type); | |
81 return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 100 && | |
82 $sv->[2] eq $chrB && abs($sv->[3] - $posB) < 100 && $sv->[4] eq $type && $type eq 'CTX'); | |
83 return 1 if( $sv->[0] eq $chrB && abs($sv->[1]- $posB) < 100 && | |
84 $sv->[2] eq $chrA && abs($sv->[3] - $posA) < 100 && $sv->[4] eq $type && $type eq 'CTX'); | |
85 } | |
86 return; | |
87 } | |
88 | |
89 sub get_gene_info { | |
90 my ($chr, $st, $ed) = @_; | |
91 $chr = 'chr' . $chr if($chr !~ m/chr/); | |
92 my $rtn=""; | |
93 my $sth = $dbh->prepare("select distinct geneName | |
94 from refFlat | |
95 where (chrom = '$chr' AND $ed >= txStart AND $st <= txEnd) | |
96 order by txStart" | |
97 ); | |
98 $sth->execute(); | |
99 my $tbl_ref = $sth->fetchall_arrayref(); | |
100 foreach my $r (@{$tbl_ref}) { | |
101 if($rtn eq '') { | |
102 $rtn = $r->[0]; | |
103 } | |
104 else { | |
105 $rtn = $rtn . ", " . $r->[0]; | |
106 } | |
107 } | |
108 return $rtn; | |
109 } | |
110 =head1 NAME | |
111 | |
112 <application name> - <One-line description of application's purpose> | |
113 | |
114 | |
115 =head1 VERSION | |
116 | |
117 The initial template usually just has: | |
118 | |
119 This documentation refers to <application name> version 0.0.1. | |
120 | |
121 | |
122 =head1 USAGE | |
123 | |
124 # Brief working invocation example(s) here showing the most common usage(s) | |
125 | |
126 # This section will be as far as many users ever read, | |
127 # so make it as educational and exemplary as possible. | |
128 =head1 REQUIRED ARGUMENTS | |
129 | |
130 A complete list of every argument that must appear on the command line. | |
131 when the application is invoked, explaining what each of them does, any | |
132 restrictions on where each one may appear (i.e., flags that must appear | |
133 before or after filenames), and how the various arguments and options | |
134 may interact (e.g., mutual exclusions, required combinations, etc.) | |
135 | |
136 If all of the application's arguments are optional, this section | |
137 may be omitted entirely. | |
138 | |
139 =head1 OPTIONS | |
140 | |
141 A complete list of every available option with which the application | |
142 can be invoked, explaining what each does, and listing any restrictions, | |
143 or interactions. | |
144 | |
145 If the application has no options, this section may be omitted entirely. | |
146 | |
147 | |
148 =head1 DESCRIPTION | |
149 | |
150 A full description of the application and its features. | |
151 May include numerous subsections (i.e., =head2, =head3, etc.). | |
152 | |
153 | |
154 =head1 DIAGNOSTICS | |
155 | |
156 A list of every error and warning message that the application can generate | |
157 (even the ones that will "never happen"), with a full explanation of each | |
158 problem, one or more likely causes, and any suggested remedies. If the | |
159 application generates exit status codes (e.g., under Unix), then list the exit | |
160 status associated with each error. | |
161 | |
162 =head1 CONFIGURATION AND ENVIRONMENT | |
163 | |
164 A full explanation of any configuration system(s) used by the application, | |
165 including the names and locations of any configuration files, and the | |
166 meaning of any environment variables or properties that can be set. These | |
167 descriptions must also include details of any configuration language used. | |
168 | |
169 | |
170 =head1 DEPENDENCIES | |
171 | |
172 A list of all the other modules that this module relies upon, including any | |
173 restrictions on versions, and an indication of whether these required modules are | |
174 part of the standard Perl distribution, part of the module's distribution, | |
175 or must be installed separately. | |
176 | |
177 | |
178 =head1 INCOMPATIBILITIES | |
179 | |
180 A list of any modules that this module cannot be used in conjunction with. | |
181 This may be due to name conflicts in the interface, or competition for | |
182 system or program resources, or due to internal limitations of Perl | |
183 (for example, many modules that use source code filters are mutually | |
184 incompatible). | |
185 | |
186 | |
187 =head1 BUGS AND LIMITATIONS | |
188 | |
189 A list of known problems with the module, together with some indication of | |
190 whether they are likely to be fixed in an upcoming release. | |
191 | |
192 Also a list of restrictions on the features the module does provide: | |
193 data types that cannot be handled, performance issues and the circumstances | |
194 in which they may arise, practical limitations on the size of data sets, | |
195 special cases that are not (yet) handled, etc. | |
196 | |
197 The initial template usually just has: | |
198 | |
199 There are no known bugs in this module. | |
200 Please report problems to <Maintainer name(s)> (<contact address>) | |
201 Patches are welcome. | |
202 | |
203 =head1 AUTHOR | |
204 | |
205 <Author name(s)> (<contact address>) | |
206 | |
207 | |
208 | |
209 =head1 LICENCE AND COPYRIGHT | |
210 | |
211 Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved. | |
212 | |
213 followed by whatever licence you wish to release it under. | |
214 For Perl code that is often just: | |
215 | |
216 This module is free software; you can redistribute it and/or | |
217 modify it under the same terms as Perl itself. See L<perlartistic>. | |
218 | |
219 This program is distributed in the hope that it will be useful, | |
220 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
221 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
222 |