annotate prep_bams @ 0:6b673ffd9e38 default tip

initial commit
author Yusuf Ali <ali@yusuf.email>
date Wed, 25 Mar 2015 13:29:45 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
1 #!/usr/bin/env perl
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
2
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
3 use strict;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
4 use warnings;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
5 use File::Basename;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
6
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
7 # configuration file stuff
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
8 my %config;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
9 my $dirname = dirname(__FILE__);
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
10 my $tool_dir = shift @ARGV;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
11
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
12 if(not -e "$tool_dir/dedup_realign_bam.loc"){
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
13 system("cp $dirname/tool-data/dedup_realign_bam.loc $tool_dir/dedup_realign_bam.loc");
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
14 }
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
15 open CONFIG, '<', "$tool_dir/dedup_realign_bam.loc";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
16 while(<CONFIG>){
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
17 next if $_ =~ /^#/;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
18 (my $key, my $value) = split(/\s+/, $_);
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
19 $config{$key} = $value;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
20 }
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
21 close CONFIG;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
22
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
23 @ARGV > 4 or die "Usage: $0 <output log> <reference.fa> <temporary directory> <deduped_realigned.bam> <input1.bam> [input2.bam] ...\n";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
24
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
25 my $log = shift @ARGV;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
26 my $ref = $config{"dbs_dir"} . shift @ARGV;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
27 my $tmpdir = shift @ARGV;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
28 my $outfile = shift @ARGV;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
29 $SIG{__DIE__} = $SIG{INT} = $SIG{HUP} = $SIG{TERM} = $SIG{ABRT} = \&cleanup;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
30 if(@ARGV > 1){
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
31 system ("merge_bam_headers $log $tmpdir/tmp$$.headers.sam ".join(" ", @ARGV))>>8 and die "merge_bam_headers failed with exit status ", ($?>>8), ", please check the log\n";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
32 system("samtools merge -h $tmpdir/tmp$$.headers.sam $tmpdir/tmp$$.bam ".join(" ", @ARGV)." 2>> $log")>>8 and die "Samtools merge failed with exit status ", ($?>>8), ", please check the log\n";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
33 system("samtools flagstat $tmpdir/tmp$$.bam > $outfile.before_dedup.flagstat.txt");
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
34 system("samtools rmdup $tmpdir/tmp$$.bam $tmpdir/tmp$$.rmdup.bam 2>> $log")>>8 and die "Samtools rmdup failed with exit status ", ($?>>8), ", please check the log\n";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
35 unlink "$tmpdir/tmp$$.bam";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
36 }
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
37 else{
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
38 system("samtools flagstat $ARGV[0] > $outfile.before_dedup.flagstat.txt");
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
39 system("samtools rmdup $ARGV[0] $tmpdir/tmp$$.rmdup.bam 2>> $log")>>8 and die "Samtools rmdup failed with exit status ", ($?>>8), ", please check the log\n";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
40 }
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
41 die "Samtools rmdup did not generate the expected output file" if not -e "$tmpdir/tmp$$.rmdup.bam";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
42 die "Samtools generated a blank output file" if -z "$tmpdir/tmp$$.rmdup.bam";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
43 system "samtools index $tmpdir/tmp$$.rmdup.bam";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
44 system "java -Xmx4G -jar $dirname/GenomeAnalysisTK.jar -I $tmpdir/tmp$$.rmdup.bam -R $ref -T RealignerTargetCreator -o $tmpdir/tmp$$.rmdup.gatk_realigner.intervals 2>> $log";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
45 die "GATK did not generate the expected intervals file" if not -e "$tmpdir/tmp$$.rmdup.gatk_realigner.intervals";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
46 system "java -Xmx4G -jar $dirname/GenomeAnalysisTK.jar -I $tmpdir/tmp$$.rmdup.bam -R $ref -T IndelRealigner -targetIntervals $tmpdir/tmp$$.rmdup.gatk_realigner.intervals -o $outfile 2>> $log";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
47 die "GATK did not generate the expected realigned BAM output file" if not -e $outfile;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
48 die "GATK generated a blank output file" if -z $outfile;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
49 my $outfile_bai = $outfile;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
50 $outfile_bai =~ s/\.bam$/.bai/;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
51 if(not -e $outfile_bai){
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
52 if(not -e "$outfile.bai"){
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
53 system "samtools index $outfile";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
54 }
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
55 system "cp $outfile.bai $outfile_bai";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
56 }
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
57 else{
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
58 system "cp $outfile_bai $outfile.bai"; # some tools expect name.bai, some expect name.bam.bai, so provide both
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
59 }
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
60 &cleanup;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
61
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
62 sub cleanup{
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
63 print STDERR @_;
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
64 unlink "$tmpdir/$tmpdir/tmp$$.headers.sam";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
65 unlink "$tmpdir/tmp$$.bam";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
66 unlink "$tmpdir/tmp$$.rmdup.bam";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
67 unlink "$tmpdir/tmp$$.rmdup.bam.bai";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
68 unlink "$tmpdir/tmp$$.rmdup.gatk_realigner.intervals";
6b673ffd9e38 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
69 }