0
|
1 #!/nfs_exports/apps/64-bit/gnu-apps/perl5.8.9/bin/perl -w
|
|
2
|
|
3 use strict;
|
|
4 use Carp;
|
|
5 use Getopt::Long;
|
|
6 use English;
|
|
7 use Pod::Usage;
|
|
8 use Data::Dumper;
|
|
9 use DBI;
|
|
10 use DBD::mysql;
|
|
11
|
|
12 my $db = 'hg18';
|
|
13 my $port = 3306;
|
|
14 my $host = '10.4.19.125';
|
|
15 my $USER = "sjXuser";
|
|
16
|
|
17 my ( $help, $man, $version, $usage );
|
|
18 my ($in, $normal, $out);
|
|
19 my $optionOK = GetOptions(
|
|
20 'd|i|in|input=s' => \$in,
|
|
21 'g|normal=s' => \$normal,
|
|
22 'o|out|output=s' => \$out,
|
|
23 'h|help|?' => \$help,
|
|
24 'man' => \$man,
|
|
25 'usage' => \$usage,
|
|
26 'v|version' => \$version,
|
|
27 );
|
|
28
|
|
29 pod2usage(2) if($man);
|
|
30 pod2usage( -verbose => 99, -sections => "USAGE|REQUIRED ARGUMENTS|OPTIONS" )
|
|
31 if($help or $usage);
|
|
32 pod2usage( -verbose => 99, -sections => "VERSION") if($version);
|
|
33
|
|
34 croak "Missing input file" if(!$in);
|
|
35 open STDOUT, ">$out" if($out);
|
|
36 open my $IN, "<$in" or croak "can't open $in: $OS_ERROR";
|
|
37 open my $NORMAL, "<$normal" or croak "can't open $normal: $OS_ERROR" if($normal);
|
|
38
|
|
39 my $dbh = DBI->connect("dbi:mysql:$db:$host:$port", $USER) or
|
|
40 die "Can't connect to MySQL database: $DBI::errstr\n";
|
|
41
|
|
42 my @SV;
|
|
43 my @normal_SV;
|
|
44 if($normal) {
|
|
45 while(my $line = <$NORMAL>) {
|
|
46 chomp $line;
|
|
47 my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
|
|
48 next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type));
|
|
49 push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
|
|
50 }
|
|
51 }
|
|
52 while( my $line = <$IN> ) {
|
|
53 chomp $line;
|
|
54 my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
|
|
55 next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type)); # the SV exists in normal sample
|
|
56 push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
|
|
57 print STDOUT $line;
|
|
58 if($type eq 'DEL') {
|
|
59 my $tmp = get_gene_info($chrA, $posA, $posB);
|
|
60 print STDOUT "\t", $tmp if($tmp);
|
|
61 }
|
|
62 else {
|
|
63 print STDOUT "\t", get_gene_info($chrA, $posA, $posA);
|
|
64 print STDOUT "\t", get_gene_info($chrB, $posB, $posB);
|
|
65 }
|
|
66 print STDOUT "\n";
|
|
67 }
|
|
68 close $IN;
|
|
69 close STDOUT if($out);
|
|
70 $dbh->disconnect;
|
|
71 exit(0);
|
|
72
|
|
73 sub search_sv {
|
|
74 my ($r_SV, $chrA, $posA, $chrB, $posB, $type) = @_;
|
|
75
|
|
76 foreach my $sv (@{$r_SV}) {
|
|
77 return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 20 &&
|
|
78 $sv->[2] eq $chrB && abs($sv->[3] - $posB) < 20 && $sv->[4] eq $type);
|
|
79 return 1 if( $sv->[0] eq $chrB && abs($sv->[1] - $posB) < 20 &&
|
|
80 $sv->[2] eq $chrA && abs($sv->[3] - $posA) < 20 && $sv->[4] eq $type);
|
|
81 return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 100 &&
|
|
82 $sv->[2] eq $chrB && abs($sv->[3] - $posB) < 100 && $sv->[4] eq $type && $type eq 'CTX');
|
|
83 return 1 if( $sv->[0] eq $chrB && abs($sv->[1]- $posB) < 100 &&
|
|
84 $sv->[2] eq $chrA && abs($sv->[3] - $posA) < 100 && $sv->[4] eq $type && $type eq 'CTX');
|
|
85 }
|
|
86 return;
|
|
87 }
|
|
88
|
|
89 sub get_gene_info {
|
|
90 my ($chr, $st, $ed) = @_;
|
|
91 $chr = 'chr' . $chr if($chr !~ m/chr/);
|
|
92 my $rtn="";
|
|
93 my $sth = $dbh->prepare("select distinct geneName
|
|
94 from refFlat
|
|
95 where (chrom = '$chr' AND $ed >= txStart AND $st <= txEnd)
|
|
96 order by txStart"
|
|
97 );
|
|
98 $sth->execute();
|
|
99 my $tbl_ref = $sth->fetchall_arrayref();
|
|
100 foreach my $r (@{$tbl_ref}) {
|
|
101 if($rtn eq '') {
|
|
102 $rtn = $r->[0];
|
|
103 }
|
|
104 else {
|
|
105 $rtn = $rtn . ", " . $r->[0];
|
|
106 }
|
|
107 }
|
|
108 return $rtn;
|
|
109 }
|
|
110 =head1 NAME
|
|
111
|
|
112 <application name> - <One-line description of application's purpose>
|
|
113
|
|
114
|
|
115 =head1 VERSION
|
|
116
|
|
117 The initial template usually just has:
|
|
118
|
|
119 This documentation refers to <application name> version 0.0.1.
|
|
120
|
|
121
|
|
122 =head1 USAGE
|
|
123
|
|
124 # Brief working invocation example(s) here showing the most common usage(s)
|
|
125
|
|
126 # This section will be as far as many users ever read,
|
|
127 # so make it as educational and exemplary as possible.
|
|
128 =head1 REQUIRED ARGUMENTS
|
|
129
|
|
130 A complete list of every argument that must appear on the command line.
|
|
131 when the application is invoked, explaining what each of them does, any
|
|
132 restrictions on where each one may appear (i.e., flags that must appear
|
|
133 before or after filenames), and how the various arguments and options
|
|
134 may interact (e.g., mutual exclusions, required combinations, etc.)
|
|
135
|
|
136 If all of the application's arguments are optional, this section
|
|
137 may be omitted entirely.
|
|
138
|
|
139 =head1 OPTIONS
|
|
140
|
|
141 A complete list of every available option with which the application
|
|
142 can be invoked, explaining what each does, and listing any restrictions,
|
|
143 or interactions.
|
|
144
|
|
145 If the application has no options, this section may be omitted entirely.
|
|
146
|
|
147
|
|
148 =head1 DESCRIPTION
|
|
149
|
|
150 A full description of the application and its features.
|
|
151 May include numerous subsections (i.e., =head2, =head3, etc.).
|
|
152
|
|
153
|
|
154 =head1 DIAGNOSTICS
|
|
155
|
|
156 A list of every error and warning message that the application can generate
|
|
157 (even the ones that will "never happen"), with a full explanation of each
|
|
158 problem, one or more likely causes, and any suggested remedies. If the
|
|
159 application generates exit status codes (e.g., under Unix), then list the exit
|
|
160 status associated with each error.
|
|
161
|
|
162 =head1 CONFIGURATION AND ENVIRONMENT
|
|
163
|
|
164 A full explanation of any configuration system(s) used by the application,
|
|
165 including the names and locations of any configuration files, and the
|
|
166 meaning of any environment variables or properties that can be set. These
|
|
167 descriptions must also include details of any configuration language used.
|
|
168
|
|
169
|
|
170 =head1 DEPENDENCIES
|
|
171
|
|
172 A list of all the other modules that this module relies upon, including any
|
|
173 restrictions on versions, and an indication of whether these required modules are
|
|
174 part of the standard Perl distribution, part of the module's distribution,
|
|
175 or must be installed separately.
|
|
176
|
|
177
|
|
178 =head1 INCOMPATIBILITIES
|
|
179
|
|
180 A list of any modules that this module cannot be used in conjunction with.
|
|
181 This may be due to name conflicts in the interface, or competition for
|
|
182 system or program resources, or due to internal limitations of Perl
|
|
183 (for example, many modules that use source code filters are mutually
|
|
184 incompatible).
|
|
185
|
|
186
|
|
187 =head1 BUGS AND LIMITATIONS
|
|
188
|
|
189 A list of known problems with the module, together with some indication of
|
|
190 whether they are likely to be fixed in an upcoming release.
|
|
191
|
|
192 Also a list of restrictions on the features the module does provide:
|
|
193 data types that cannot be handled, performance issues and the circumstances
|
|
194 in which they may arise, practical limitations on the size of data sets,
|
|
195 special cases that are not (yet) handled, etc.
|
|
196
|
|
197 The initial template usually just has:
|
|
198
|
|
199 There are no known bugs in this module.
|
|
200 Please report problems to <Maintainer name(s)> (<contact address>)
|
|
201 Patches are welcome.
|
|
202
|
|
203 =head1 AUTHOR
|
|
204
|
|
205 <Author name(s)> (<contact address>)
|
|
206
|
|
207
|
|
208
|
|
209 =head1 LICENCE AND COPYRIGHT
|
|
210
|
|
211 Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved.
|
|
212
|
|
213 followed by whatever licence you wish to release it under.
|
|
214 For Perl code that is often just:
|
|
215
|
|
216 This module is free software; you can redistribute it and/or
|
|
217 modify it under the same terms as Perl itself. See L<perlartistic>.
|
|
218
|
|
219 This program is distributed in the hope that it will be useful,
|
|
220 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
221 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
222
|