Mercurial > repos > jjohnson > crest
diff getUniqSV.pl @ 0:acc8d8bfeb9a
Uploaded
author | jjohnson |
---|---|
date | Wed, 08 Feb 2012 16:59:24 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getUniqSV.pl Wed Feb 08 16:59:24 2012 -0500 @@ -0,0 +1,222 @@ +#!/nfs_exports/apps/64-bit/gnu-apps/perl5.8.9/bin/perl -w + +use strict; +use Carp; +use Getopt::Long; +use English; +use Pod::Usage; +use Data::Dumper; +use DBI; +use DBD::mysql; + +my $db = 'hg18'; +my $port = 3306; +my $host = '10.4.19.125'; +my $USER = "sjXuser"; + +my ( $help, $man, $version, $usage ); +my ($in, $normal, $out); +my $optionOK = GetOptions( + 'd|i|in|input=s' => \$in, + 'g|normal=s' => \$normal, + 'o|out|output=s' => \$out, + 'h|help|?' => \$help, + 'man' => \$man, + 'usage' => \$usage, + 'v|version' => \$version, +); + +pod2usage(2) if($man); +pod2usage( -verbose => 99, -sections => "USAGE|REQUIRED ARGUMENTS|OPTIONS" ) + if($help or $usage); +pod2usage( -verbose => 99, -sections => "VERSION") if($version); + +croak "Missing input file" if(!$in); +open STDOUT, ">$out" if($out); +open my $IN, "<$in" or croak "can't open $in: $OS_ERROR"; +open my $NORMAL, "<$normal" or croak "can't open $normal: $OS_ERROR" if($normal); + +my $dbh = DBI->connect("dbi:mysql:$db:$host:$port", $USER) or + die "Can't connect to MySQL database: $DBI::errstr\n"; + +my @SV; +my @normal_SV; +if($normal) { + while(my $line = <$NORMAL>) { + chomp $line; + my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line; + next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type)); + push @normal_SV, [$chrA, $posA, $chrB, $posB, $type]; + } +} +while( my $line = <$IN> ) { + chomp $line; + my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line; + next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type)); # the SV exists in normal sample + push @normal_SV, [$chrA, $posA, $chrB, $posB, $type]; + print STDOUT $line; + if($type eq 'DEL') { + my $tmp = get_gene_info($chrA, $posA, $posB); + print STDOUT "\t", $tmp if($tmp); + } + else { + print STDOUT "\t", get_gene_info($chrA, $posA, $posA); + print STDOUT "\t", get_gene_info($chrB, $posB, $posB); + } + print STDOUT "\n"; +} +close $IN; +close STDOUT if($out); +$dbh->disconnect; +exit(0); + +sub search_sv { + my ($r_SV, $chrA, $posA, $chrB, $posB, $type) = @_; + + foreach my $sv (@{$r_SV}) { + return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 20 && + $sv->[2] eq $chrB && abs($sv->[3] - $posB) < 20 && $sv->[4] eq $type); + return 1 if( $sv->[0] eq $chrB && abs($sv->[1] - $posB) < 20 && + $sv->[2] eq $chrA && abs($sv->[3] - $posA) < 20 && $sv->[4] eq $type); + return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 100 && + $sv->[2] eq $chrB && abs($sv->[3] - $posB) < 100 && $sv->[4] eq $type && $type eq 'CTX'); + return 1 if( $sv->[0] eq $chrB && abs($sv->[1]- $posB) < 100 && + $sv->[2] eq $chrA && abs($sv->[3] - $posA) < 100 && $sv->[4] eq $type && $type eq 'CTX'); + } + return; +} + +sub get_gene_info { + my ($chr, $st, $ed) = @_; + $chr = 'chr' . $chr if($chr !~ m/chr/); + my $rtn=""; + my $sth = $dbh->prepare("select distinct geneName + from refFlat + where (chrom = '$chr' AND $ed >= txStart AND $st <= txEnd) + order by txStart" + ); + $sth->execute(); + my $tbl_ref = $sth->fetchall_arrayref(); + foreach my $r (@{$tbl_ref}) { + if($rtn eq '') { + $rtn = $r->[0]; + } + else { + $rtn = $rtn . ", " . $r->[0]; + } + } + return $rtn; +} +=head1 NAME + +<application name> - <One-line description of application's purpose> + + +=head1 VERSION + +The initial template usually just has: + +This documentation refers to <application name> version 0.0.1. + + +=head1 USAGE + + # Brief working invocation example(s) here showing the most common usage(s) + + # This section will be as far as many users ever read, + # so make it as educational and exemplary as possible. +=head1 REQUIRED ARGUMENTS + +A complete list of every argument that must appear on the command line. +when the application is invoked, explaining what each of them does, any +restrictions on where each one may appear (i.e., flags that must appear +before or after filenames), and how the various arguments and options +may interact (e.g., mutual exclusions, required combinations, etc.) + +If all of the application's arguments are optional, this section +may be omitted entirely. + +=head1 OPTIONS + +A complete list of every available option with which the application +can be invoked, explaining what each does, and listing any restrictions, +or interactions. + +If the application has no options, this section may be omitted entirely. + + +=head1 DESCRIPTION + +A full description of the application and its features. +May include numerous subsections (i.e., =head2, =head3, etc.). + + +=head1 DIAGNOSTICS + +A list of every error and warning message that the application can generate +(even the ones that will "never happen"), with a full explanation of each +problem, one or more likely causes, and any suggested remedies. If the +application generates exit status codes (e.g., under Unix), then list the exit +status associated with each error. + +=head1 CONFIGURATION AND ENVIRONMENT + +A full explanation of any configuration system(s) used by the application, +including the names and locations of any configuration files, and the +meaning of any environment variables or properties that can be set. These +descriptions must also include details of any configuration language used. + + +=head1 DEPENDENCIES + +A list of all the other modules that this module relies upon, including any +restrictions on versions, and an indication of whether these required modules are +part of the standard Perl distribution, part of the module's distribution, +or must be installed separately. + + +=head1 INCOMPATIBILITIES + +A list of any modules that this module cannot be used in conjunction with. +This may be due to name conflicts in the interface, or competition for +system or program resources, or due to internal limitations of Perl +(for example, many modules that use source code filters are mutually +incompatible). + + +=head1 BUGS AND LIMITATIONS + +A list of known problems with the module, together with some indication of +whether they are likely to be fixed in an upcoming release. + +Also a list of restrictions on the features the module does provide: +data types that cannot be handled, performance issues and the circumstances +in which they may arise, practical limitations on the size of data sets, +special cases that are not (yet) handled, etc. + +The initial template usually just has: + +There are no known bugs in this module. +Please report problems to <Maintainer name(s)> (<contact address>) +Patches are welcome. + +=head1 AUTHOR + +<Author name(s)> (<contact address>) + + + +=head1 LICENCE AND COPYRIGHT + +Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved. + +followed by whatever licence you wish to release it under. +For Perl code that is often just: + +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. See L<perlartistic>. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +