Mercurial > repos > yusuf > dedup_realign_bam
diff prep_bams @ 0:6b673ffd9e38 default tip
initial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Wed, 25 Mar 2015 13:29:45 -0600 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prep_bams Wed Mar 25 13:29:45 2015 -0600 @@ -0,0 +1,69 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use File::Basename; + +# configuration file stuff +my %config; +my $dirname = dirname(__FILE__); +my $tool_dir = shift @ARGV; + +if(not -e "$tool_dir/dedup_realign_bam.loc"){ + system("cp $dirname/tool-data/dedup_realign_bam.loc $tool_dir/dedup_realign_bam.loc"); +} +open CONFIG, '<', "$tool_dir/dedup_realign_bam.loc"; +while(<CONFIG>){ + next if $_ =~ /^#/; + (my $key, my $value) = split(/\s+/, $_); + $config{$key} = $value; +} +close CONFIG; + +@ARGV > 4 or die "Usage: $0 <output log> <reference.fa> <temporary directory> <deduped_realigned.bam> <input1.bam> [input2.bam] ...\n"; + +my $log = shift @ARGV; +my $ref = $config{"dbs_dir"} . shift @ARGV; +my $tmpdir = shift @ARGV; +my $outfile = shift @ARGV; +$SIG{__DIE__} = $SIG{INT} = $SIG{HUP} = $SIG{TERM} = $SIG{ABRT} = \&cleanup; +if(@ARGV > 1){ + system ("merge_bam_headers $log $tmpdir/tmp$$.headers.sam ".join(" ", @ARGV))>>8 and die "merge_bam_headers failed with exit status ", ($?>>8), ", please check the log\n"; + system("samtools merge -h $tmpdir/tmp$$.headers.sam $tmpdir/tmp$$.bam ".join(" ", @ARGV)." 2>> $log")>>8 and die "Samtools merge failed with exit status ", ($?>>8), ", please check the log\n"; + system("samtools flagstat $tmpdir/tmp$$.bam > $outfile.before_dedup.flagstat.txt"); + system("samtools rmdup $tmpdir/tmp$$.bam $tmpdir/tmp$$.rmdup.bam 2>> $log")>>8 and die "Samtools rmdup failed with exit status ", ($?>>8), ", please check the log\n"; + unlink "$tmpdir/tmp$$.bam"; +} +else{ + system("samtools flagstat $ARGV[0] > $outfile.before_dedup.flagstat.txt"); + system("samtools rmdup $ARGV[0] $tmpdir/tmp$$.rmdup.bam 2>> $log")>>8 and die "Samtools rmdup failed with exit status ", ($?>>8), ", please check the log\n"; +} +die "Samtools rmdup did not generate the expected output file" if not -e "$tmpdir/tmp$$.rmdup.bam"; +die "Samtools generated a blank output file" if -z "$tmpdir/tmp$$.rmdup.bam"; +system "samtools index $tmpdir/tmp$$.rmdup.bam"; +system "java -Xmx4G -jar $dirname/GenomeAnalysisTK.jar -I $tmpdir/tmp$$.rmdup.bam -R $ref -T RealignerTargetCreator -o $tmpdir/tmp$$.rmdup.gatk_realigner.intervals 2>> $log"; +die "GATK did not generate the expected intervals file" if not -e "$tmpdir/tmp$$.rmdup.gatk_realigner.intervals"; +system "java -Xmx4G -jar $dirname/GenomeAnalysisTK.jar -I $tmpdir/tmp$$.rmdup.bam -R $ref -T IndelRealigner -targetIntervals $tmpdir/tmp$$.rmdup.gatk_realigner.intervals -o $outfile 2>> $log"; +die "GATK did not generate the expected realigned BAM output file" if not -e $outfile; +die "GATK generated a blank output file" if -z $outfile; +my $outfile_bai = $outfile; +$outfile_bai =~ s/\.bam$/.bai/; +if(not -e $outfile_bai){ + if(not -e "$outfile.bai"){ + system "samtools index $outfile"; + } + system "cp $outfile.bai $outfile_bai"; +} +else{ + system "cp $outfile_bai $outfile.bai"; # some tools expect name.bai, some expect name.bam.bai, so provide both +} +&cleanup; + +sub cleanup{ + print STDERR @_; + unlink "$tmpdir/$tmpdir/tmp$$.headers.sam"; + unlink "$tmpdir/tmp$$.bam"; + unlink "$tmpdir/tmp$$.rmdup.bam"; + unlink "$tmpdir/tmp$$.rmdup.bam.bai"; + unlink "$tmpdir/tmp$$.rmdup.gatk_realigner.intervals"; +}