Mercurial > repos > jjohnson > crest
diff countDiff.pl @ 0:acc8d8bfeb9a
Uploaded
author | jjohnson |
---|---|
date | Wed, 08 Feb 2012 16:59:24 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/countDiff.pl Wed Feb 08 16:59:24 2012 -0500 @@ -0,0 +1,74 @@ +#!/nfs_exports/apps/64-bit/gnu-apps/perl5.8.9/bin/perl -w +use strict; +use Carp; +use Getopt::Long; +use English; +use Pod::Usage; +use Data::Dumper; +use Bio::DB::Sam; +use Bio::DB::Sam::Constants; +use Bio::SearchIO; +use Bio::SeqIO; +use File::Temp qw/ tempfile tempdir /; +use File::Spec; +use File::Path; +use File::Copy; +use File::Basename; +use Cwd; + +# bam related variables +# input/output +my($file_d, $file_g); + +my ( $help, $man, $version, $usage ); +my $optionOK = GetOptions( + 'd|diagnostic=s' => \$file_d, + 'g|germline=s' => \$file_g, + 'h|help|?' => \$help, + 'man' => \$man, + 'usage' => \$usage, + 'v|version' => \$version, +); + +pod2usage(-verbose=>2) if($man or $usage); +pod2usage(1) if($help or $version ); + +my $start_dir = getcwd; + +# figure out input file +if(!$file_d or !$file_g ) { + croak "you need specify diagnostic and germline input file"; +} +my ( @cover_D, @cover_diff); +for( my $i = 1; $i < 1000; $i++) { + $cover_D[$i] = 0; + $cover_diff[$i] = 0; +} +my($prefix, $path, $suffix) = fileparse($file_d); +my $diff_file = $prefix . ".somatic.cover"; +$diff_file = File::Spec->catfile($path, $diff_file); +open my $SOMATIC, ">$diff_file" or croak "can't open $diff_file:$OS_ERROR"; +open my $IN_D, "<$file_d" or croak "can't open $file_d:$OS_ERROR"; +open my $IN_G, "<$file_g" or croak "can't open $file_g:$OS_ERROR"; +my %g_sclip; +while( my $line = <$IN_G> ) { + chomp $line; + my ($chr, $pos, $ort, $cover) = split /\t/, $line; + $g_sclip{$chr} = {} if(!exists($g_sclip{$chr})); + $g_sclip{$chr}->{$pos} = $ort; +} +close($IN_G); +while( my $line = <$IN_D> ) { + chomp $line; + my ($chr, $pos, $ort, $cover) = split /\t/, $line; + $cover_D[$cover]++; + next if(exists ($g_sclip{$chr}->{$pos}) && $g_sclip{$chr}->{$pos} eq $ort); + $cover_diff[$cover]++; + print $SOMATIC $line, "\n"; +} +close($IN_D); +close $SOMATIC; +for( my $i = 1; $i < 1000; $i++) { + print join("\t", $i, $cover_D[$i], $cover_diff[$i]), "\n"; +} +