| 
0
 | 
     1 #!/nfs_exports/apps/64-bit/gnu-apps/perl5.8.9/bin/perl -w
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 use strict;
 | 
| 
 | 
     4 use Carp;
 | 
| 
 | 
     5 use Getopt::Long;
 | 
| 
 | 
     6 use English;
 | 
| 
 | 
     7 use Pod::Usage;
 | 
| 
 | 
     8 use Data::Dumper;
 | 
| 
 | 
     9 use DBI;
 | 
| 
 | 
    10 use DBD::mysql;
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 my $db = 'hg18';
 | 
| 
 | 
    13 my $port = 3306;
 | 
| 
 | 
    14 my $host = '10.4.19.125';
 | 
| 
 | 
    15 my $USER = "sjXuser";
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 my ( $help, $man, $version, $usage );
 | 
| 
 | 
    18 my ($in, $normal, $out);
 | 
| 
 | 
    19 my $optionOK = GetOptions(
 | 
| 
 | 
    20 	'd|i|in|input=s'	=> \$in,
 | 
| 
 | 
    21 	'g|normal=s'	    => \$normal,
 | 
| 
 | 
    22 	'o|out|output=s'	=> \$out,
 | 
| 
 | 
    23 	'h|help|?'		=> \$help,
 | 
| 
 | 
    24 	'man'			=> \$man,
 | 
| 
 | 
    25 	'usage'			=> \$usage,
 | 
| 
 | 
    26 	'v|version'		=> \$version,
 | 
| 
 | 
    27 );
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 pod2usage(2) if($man);
 | 
| 
 | 
    30 pod2usage( -verbose => 99, -sections => "USAGE|REQUIRED ARGUMENTS|OPTIONS" ) 
 | 
| 
 | 
    31 	if($help or $usage);
 | 
| 
 | 
    32 pod2usage( -verbose => 99, -sections => "VERSION") if($version);
 | 
| 
 | 
    33 
 | 
| 
 | 
    34 croak "Missing input file" if(!$in);
 | 
| 
 | 
    35 open STDOUT, ">$out" if($out);
 | 
| 
 | 
    36 open my $IN, "<$in" or croak "can't open $in: $OS_ERROR";
 | 
| 
 | 
    37 open my $NORMAL, "<$normal" or croak "can't open $normal: $OS_ERROR" if($normal);
 | 
| 
 | 
    38 
 | 
| 
 | 
    39 my $dbh = DBI->connect("dbi:mysql:$db:$host:$port", $USER) or
 | 
| 
 | 
    40 	die "Can't connect to MySQL database: $DBI::errstr\n";
 | 
| 
 | 
    41 
 | 
| 
 | 
    42 my @SV;
 | 
| 
 | 
    43 my @normal_SV;
 | 
| 
 | 
    44 if($normal) {
 | 
| 
 | 
    45 	while(my $line = <$NORMAL>) {
 | 
| 
 | 
    46 		chomp $line;
 | 
| 
 | 
    47 		my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
 | 
| 
 | 
    48 		next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type));
 | 
| 
 | 
    49 		push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
 | 
| 
 | 
    50 	}
 | 
| 
 | 
    51 }
 | 
| 
 | 
    52 while( my $line = <$IN> ) {
 | 
| 
 | 
    53 	chomp $line;
 | 
| 
 | 
    54 	my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
 | 
| 
 | 
    55 	next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type)); # the SV exists in normal sample
 | 
| 
 | 
    56 	push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
 | 
| 
 | 
    57 	print STDOUT $line;
 | 
| 
 | 
    58 	if($type eq 'DEL') {
 | 
| 
 | 
    59 		my $tmp = get_gene_info($chrA, $posA, $posB);
 | 
| 
 | 
    60 		print STDOUT "\t", $tmp if($tmp);
 | 
| 
 | 
    61 	}
 | 
| 
 | 
    62 	else {
 | 
| 
 | 
    63 		print STDOUT "\t", get_gene_info($chrA, $posA, $posA);
 | 
| 
 | 
    64 		print STDOUT "\t", get_gene_info($chrB, $posB, $posB);
 | 
| 
 | 
    65 	}
 | 
| 
 | 
    66 	print STDOUT "\n";
 | 
| 
 | 
    67 }
 | 
| 
 | 
    68 close $IN;
 | 
| 
 | 
    69 close STDOUT if($out);
 | 
| 
 | 
    70 $dbh->disconnect;
 | 
| 
 | 
    71 exit(0);
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 sub search_sv {
 | 
| 
 | 
    74 	my ($r_SV, $chrA, $posA, $chrB, $posB, $type) = @_;
 | 
| 
 | 
    75 
 | 
| 
 | 
    76 	foreach my $sv (@{$r_SV}) {
 | 
| 
 | 
    77 		return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 20 &&
 | 
| 
 | 
    78 			$sv->[2] eq $chrB && abs($sv->[3] - $posB) < 20 && $sv->[4] eq $type);
 | 
| 
 | 
    79 		return 1 if( $sv->[0] eq $chrB && abs($sv->[1] - $posB) < 20 &&
 | 
| 
 | 
    80 			$sv->[2] eq $chrA && abs($sv->[3] - $posA) < 20 && $sv->[4] eq $type);
 | 
| 
 | 
    81 		return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 100 &&
 | 
| 
 | 
    82 			$sv->[2] eq $chrB && abs($sv->[3] - $posB) < 100 && $sv->[4] eq $type && $type eq 'CTX');
 | 
| 
 | 
    83 		return 1 if( $sv->[0] eq $chrB && abs($sv->[1]- $posB) < 100 &&
 | 
| 
 | 
    84 			$sv->[2] eq $chrA && abs($sv->[3] - $posA)  < 100  && $sv->[4] eq $type && $type eq 'CTX');
 | 
| 
 | 
    85 	}
 | 
| 
 | 
    86 	return;
 | 
| 
 | 
    87 }
 | 
| 
 | 
    88 
 | 
| 
 | 
    89 sub get_gene_info {
 | 
| 
 | 
    90    my ($chr, $st, $ed) = @_;
 | 
| 
 | 
    91    $chr = 'chr' . $chr if($chr !~ m/chr/);
 | 
| 
 | 
    92 	my $rtn="";
 | 
| 
 | 
    93    my $sth = $dbh->prepare("select distinct geneName 
 | 
| 
 | 
    94                            from refFlat
 | 
| 
 | 
    95                            where (chrom = '$chr' AND $ed >= txStart AND $st <= txEnd)
 | 
| 
 | 
    96                            order by txStart"
 | 
| 
 | 
    97                           );
 | 
| 
 | 
    98    $sth->execute();
 | 
| 
 | 
    99    my $tbl_ref = $sth->fetchall_arrayref();
 | 
| 
 | 
   100    foreach my $r (@{$tbl_ref}) {
 | 
| 
 | 
   101    		if($rtn eq '') { 
 | 
| 
 | 
   102 			$rtn = $r->[0];
 | 
| 
 | 
   103 		} 
 | 
| 
 | 
   104 		else {
 | 
| 
 | 
   105 			$rtn = $rtn . ", " .  $r->[0];
 | 
| 
 | 
   106 		}
 | 
| 
 | 
   107 	}
 | 
| 
 | 
   108 	return $rtn;
 | 
| 
 | 
   109 }
 | 
| 
 | 
   110 =head1 NAME
 | 
| 
 | 
   111 
 | 
| 
 | 
   112 <application name> - <One-line description of application's purpose>
 | 
| 
 | 
   113 
 | 
| 
 | 
   114 
 | 
| 
 | 
   115 =head1 VERSION
 | 
| 
 | 
   116 
 | 
| 
 | 
   117 The initial template usually just has:
 | 
| 
 | 
   118 
 | 
| 
 | 
   119 This documentation refers to <application name> version 0.0.1.
 | 
| 
 | 
   120 
 | 
| 
 | 
   121 
 | 
| 
 | 
   122 =head1 USAGE
 | 
| 
 | 
   123 
 | 
| 
 | 
   124     # Brief working invocation example(s) here showing the most common usage(s)
 | 
| 
 | 
   125 
 | 
| 
 | 
   126 	# This section will be as far as many users ever read,
 | 
| 
 | 
   127 	# so make it as educational and exemplary as possible.
 | 
| 
 | 
   128 =head1 REQUIRED ARGUMENTS
 | 
| 
 | 
   129 
 | 
| 
 | 
   130 A complete list of every argument that must appear on the command line.
 | 
| 
 | 
   131 when the application  is invoked, explaining what each of them does, any
 | 
| 
 | 
   132 restrictions on where each one may appear (i.e., flags that must appear
 | 
| 
 | 
   133 before or after filenames), and how the various arguments and options
 | 
| 
 | 
   134 may interact (e.g., mutual exclusions, required combinations, etc.)
 | 
| 
 | 
   135 
 | 
| 
 | 
   136 If all of the application's arguments are optional, this section
 | 
| 
 | 
   137 may be omitted entirely.
 | 
| 
 | 
   138 
 | 
| 
 | 
   139 =head1 OPTIONS
 | 
| 
 | 
   140 
 | 
| 
 | 
   141 A complete list of every available option with which the application
 | 
| 
 | 
   142 can be invoked, explaining what each does, and listing any restrictions,
 | 
| 
 | 
   143 or interactions.
 | 
| 
 | 
   144 
 | 
| 
 | 
   145 If the application has no options, this section may be omitted entirely.
 | 
| 
 | 
   146 
 | 
| 
 | 
   147 
 | 
| 
 | 
   148 =head1 DESCRIPTION
 | 
| 
 | 
   149 
 | 
| 
 | 
   150 A full description of the application and its features.
 | 
| 
 | 
   151 May include numerous subsections (i.e., =head2, =head3, etc.).
 | 
| 
 | 
   152 
 | 
| 
 | 
   153 
 | 
| 
 | 
   154 =head1 DIAGNOSTICS
 | 
| 
 | 
   155 
 | 
| 
 | 
   156 A list of every error and warning message that the application can generate
 | 
| 
 | 
   157 (even the ones that will "never happen"), with a full explanation of each
 | 
| 
 | 
   158 problem, one or more likely causes, and any suggested remedies. If the
 | 
| 
 | 
   159 application generates exit status codes (e.g., under Unix), then list the exit
 | 
| 
 | 
   160 status associated with each error.
 | 
| 
 | 
   161 
 | 
| 
 | 
   162 =head1 CONFIGURATION AND ENVIRONMENT
 | 
| 
 | 
   163 
 | 
| 
 | 
   164 A full explanation of any configuration system(s) used by the application,
 | 
| 
 | 
   165 including the names and locations of any configuration files, and the
 | 
| 
 | 
   166 meaning of any environment variables or properties that can be set. These
 | 
| 
 | 
   167 descriptions must also include details of any configuration language used.
 | 
| 
 | 
   168 
 | 
| 
 | 
   169 
 | 
| 
 | 
   170 =head1 DEPENDENCIES
 | 
| 
 | 
   171 
 | 
| 
 | 
   172 A list of all the other modules that this module relies upon, including any
 | 
| 
 | 
   173 restrictions on versions, and an indication of whether these required modules are
 | 
| 
 | 
   174 part of the standard Perl distribution, part of the module's distribution,
 | 
| 
 | 
   175 or must be installed separately.
 | 
| 
 | 
   176 
 | 
| 
 | 
   177 
 | 
| 
 | 
   178 =head1 INCOMPATIBILITIES
 | 
| 
 | 
   179 
 | 
| 
 | 
   180 A list of any modules that this module cannot be used in conjunction with.
 | 
| 
 | 
   181 This may be due to name conflicts in the interface, or competition for
 | 
| 
 | 
   182 system or program resources, or due to internal limitations of Perl
 | 
| 
 | 
   183 (for example, many modules that use source code filters are mutually
 | 
| 
 | 
   184 incompatible).
 | 
| 
 | 
   185 
 | 
| 
 | 
   186 
 | 
| 
 | 
   187 =head1 BUGS AND LIMITATIONS
 | 
| 
 | 
   188 
 | 
| 
 | 
   189 A list of known problems with the module, together with some indication of
 | 
| 
 | 
   190 whether they are likely to be fixed in an upcoming release.
 | 
| 
 | 
   191 
 | 
| 
 | 
   192 Also a list of restrictions on the features the module does provide:
 | 
| 
 | 
   193 data types that cannot be handled, performance issues and the circumstances
 | 
| 
 | 
   194 in which they may arise, practical limitations on the size of data sets,
 | 
| 
 | 
   195 special cases that are not (yet) handled, etc.
 | 
| 
 | 
   196 
 | 
| 
 | 
   197 The initial template usually just has:
 | 
| 
 | 
   198 
 | 
| 
 | 
   199 There are no known bugs in this module.
 | 
| 
 | 
   200 Please report problems to <Maintainer name(s)>  (<contact address>)
 | 
| 
 | 
   201 Patches are welcome.
 | 
| 
 | 
   202 
 | 
| 
 | 
   203 =head1 AUTHOR
 | 
| 
 | 
   204 
 | 
| 
 | 
   205 <Author name(s)> (<contact address>)
 | 
| 
 | 
   206 
 | 
| 
 | 
   207 
 | 
| 
 | 
   208 
 | 
| 
 | 
   209 =head1 LICENCE AND COPYRIGHT
 | 
| 
 | 
   210 
 | 
| 
 | 
   211 Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved.
 | 
| 
 | 
   212 
 | 
| 
 | 
   213 followed by whatever licence you wish to release it under.
 | 
| 
 | 
   214 For Perl code that is often just:
 | 
| 
 | 
   215 
 | 
| 
 | 
   216 This module is free software; you can redistribute it and/or
 | 
| 
 | 
   217 modify it under the same terms as Perl itself. See L<perlartistic>.
 | 
| 
 | 
   218 
 | 
| 
 | 
   219 This program is distributed in the hope that it will be useful,
 | 
| 
 | 
   220 but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
| 
 | 
   221 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | 
| 
 | 
   222 
 |