view getUniqSV.pl @ 0:acc8d8bfeb9a

Uploaded
author jjohnson
date Wed, 08 Feb 2012 16:59:24 -0500
parents
children
line wrap: on
line source

#!/nfs_exports/apps/64-bit/gnu-apps/perl5.8.9/bin/perl -w

use strict;
use Carp;
use Getopt::Long;
use English;
use Pod::Usage;
use Data::Dumper;
use DBI;
use DBD::mysql;

my $db = 'hg18';
my $port = 3306;
my $host = '10.4.19.125';
my $USER = "sjXuser";

my ( $help, $man, $version, $usage );
my ($in, $normal, $out);
my $optionOK = GetOptions(
	'd|i|in|input=s'	=> \$in,
	'g|normal=s'	    => \$normal,
	'o|out|output=s'	=> \$out,
	'h|help|?'		=> \$help,
	'man'			=> \$man,
	'usage'			=> \$usage,
	'v|version'		=> \$version,
);

pod2usage(2) if($man);
pod2usage( -verbose => 99, -sections => "USAGE|REQUIRED ARGUMENTS|OPTIONS" ) 
	if($help or $usage);
pod2usage( -verbose => 99, -sections => "VERSION") if($version);

croak "Missing input file" if(!$in);
open STDOUT, ">$out" if($out);
open my $IN, "<$in" or croak "can't open $in: $OS_ERROR";
open my $NORMAL, "<$normal" or croak "can't open $normal: $OS_ERROR" if($normal);

my $dbh = DBI->connect("dbi:mysql:$db:$host:$port", $USER) or
	die "Can't connect to MySQL database: $DBI::errstr\n";

my @SV;
my @normal_SV;
if($normal) {
	while(my $line = <$NORMAL>) {
		chomp $line;
		my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
		next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type));
		push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
	}
}
while( my $line = <$IN> ) {
	chomp $line;
	my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
	next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type)); # the SV exists in normal sample
	push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
	print STDOUT $line;
	if($type eq 'DEL') {
		my $tmp = get_gene_info($chrA, $posA, $posB);
		print STDOUT "\t", $tmp if($tmp);
	}
	else {
		print STDOUT "\t", get_gene_info($chrA, $posA, $posA);
		print STDOUT "\t", get_gene_info($chrB, $posB, $posB);
	}
	print STDOUT "\n";
}
close $IN;
close STDOUT if($out);
$dbh->disconnect;
exit(0);

sub search_sv {
	my ($r_SV, $chrA, $posA, $chrB, $posB, $type) = @_;

	foreach my $sv (@{$r_SV}) {
		return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 20 &&
			$sv->[2] eq $chrB && abs($sv->[3] - $posB) < 20 && $sv->[4] eq $type);
		return 1 if( $sv->[0] eq $chrB && abs($sv->[1] - $posB) < 20 &&
			$sv->[2] eq $chrA && abs($sv->[3] - $posA) < 20 && $sv->[4] eq $type);
		return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 100 &&
			$sv->[2] eq $chrB && abs($sv->[3] - $posB) < 100 && $sv->[4] eq $type && $type eq 'CTX');
		return 1 if( $sv->[0] eq $chrB && abs($sv->[1]- $posB) < 100 &&
			$sv->[2] eq $chrA && abs($sv->[3] - $posA)  < 100  && $sv->[4] eq $type && $type eq 'CTX');
	}
	return;
}

sub get_gene_info {
   my ($chr, $st, $ed) = @_;
   $chr = 'chr' . $chr if($chr !~ m/chr/);
	my $rtn="";
   my $sth = $dbh->prepare("select distinct geneName 
                           from refFlat
                           where (chrom = '$chr' AND $ed >= txStart AND $st <= txEnd)
                           order by txStart"
                          );
   $sth->execute();
   my $tbl_ref = $sth->fetchall_arrayref();
   foreach my $r (@{$tbl_ref}) {
   		if($rtn eq '') { 
			$rtn = $r->[0];
		} 
		else {
			$rtn = $rtn . ", " .  $r->[0];
		}
	}
	return $rtn;
}
=head1 NAME

<application name> - <One-line description of application's purpose>


=head1 VERSION

The initial template usually just has:

This documentation refers to <application name> version 0.0.1.


=head1 USAGE

    # Brief working invocation example(s) here showing the most common usage(s)

	# This section will be as far as many users ever read,
	# so make it as educational and exemplary as possible.
=head1 REQUIRED ARGUMENTS

A complete list of every argument that must appear on the command line.
when the application  is invoked, explaining what each of them does, any
restrictions on where each one may appear (i.e., flags that must appear
before or after filenames), and how the various arguments and options
may interact (e.g., mutual exclusions, required combinations, etc.)

If all of the application's arguments are optional, this section
may be omitted entirely.

=head1 OPTIONS

A complete list of every available option with which the application
can be invoked, explaining what each does, and listing any restrictions,
or interactions.

If the application has no options, this section may be omitted entirely.


=head1 DESCRIPTION

A full description of the application and its features.
May include numerous subsections (i.e., =head2, =head3, etc.).


=head1 DIAGNOSTICS

A list of every error and warning message that the application can generate
(even the ones that will "never happen"), with a full explanation of each
problem, one or more likely causes, and any suggested remedies. If the
application generates exit status codes (e.g., under Unix), then list the exit
status associated with each error.

=head1 CONFIGURATION AND ENVIRONMENT

A full explanation of any configuration system(s) used by the application,
including the names and locations of any configuration files, and the
meaning of any environment variables or properties that can be set. These
descriptions must also include details of any configuration language used.


=head1 DEPENDENCIES

A list of all the other modules that this module relies upon, including any
restrictions on versions, and an indication of whether these required modules are
part of the standard Perl distribution, part of the module's distribution,
or must be installed separately.


=head1 INCOMPATIBILITIES

A list of any modules that this module cannot be used in conjunction with.
This may be due to name conflicts in the interface, or competition for
system or program resources, or due to internal limitations of Perl
(for example, many modules that use source code filters are mutually
incompatible).


=head1 BUGS AND LIMITATIONS

A list of known problems with the module, together with some indication of
whether they are likely to be fixed in an upcoming release.

Also a list of restrictions on the features the module does provide:
data types that cannot be handled, performance issues and the circumstances
in which they may arise, practical limitations on the size of data sets,
special cases that are not (yet) handled, etc.

The initial template usually just has:

There are no known bugs in this module.
Please report problems to <Maintainer name(s)>  (<contact address>)
Patches are welcome.

=head1 AUTHOR

<Author name(s)> (<contact address>)



=head1 LICENCE AND COPYRIGHT

Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved.

followed by whatever licence you wish to release it under.
For Perl code that is often just:

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.