annotate call_miseq_variants @ 0:1a23ea467feb default tip

intial commit
author Yusuf Ali <ali@yusuf.email>
date Thu, 26 Mar 2015 09:36:17 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
1 #!/usr/bin/env perl
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
2
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
3 # Locked down procedure for variant calling
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
4 use strict;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
5 use warnings;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
6 use File::Basename;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
7 #@ARGV == 7 or die "Usage: $0 <sample name> <output dir> <deduped_realigned_input.bam> <target_regions.bed> <gene_regions.bed> <reference.fasta> <max num processes>\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
8
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
9 # parse configuration file
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
10 my $dirname= dirname(__FILE__);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
11 my $tool_data = shift @ARGV;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
12 my %config;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
13 if( not -e "$tool_data/miseq_bam_variants.loc" ){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
14 system("cp $dirname/tool-data/miseq_bam_variants.loc $tool_data/miseq_bam_variants.loc");
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
15 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
16 open CONFIG, '<',"$tool_data/miseq_bam_variants.loc";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
17 while(<CONFIG>){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
18 (my $key, my $value) = split(/\s+/, $_ );
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
19 $config{$key} = $value;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
20 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
21 my $dbs_dir = $config{"reference_dbs"};
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
22 my $name_dir = $config{"named_regions_dir"};
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
23 my $capt_dir = $config{"capture_kits_dir"};
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
24 close CONFIG;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
25
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
26 my $sample_name = shift @ARGV;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
27 my $outdir = shift @ARGV;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
28 my $bam = shift @ARGV;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
29 my $bed = $capt_dir . (shift @ARGV);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
30 my $gene_regions = $name_dir . (shift @ARGV);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
31 my $fasta = $dbs_dir . (shift @ARGV);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
32 my $max_processes = shift @ARGV;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
33
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
34 my $log = "$outdir/$sample_name.call_variants.log.txt";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
35 my $gatk = "$outdir/$sample_name.gatk_haplotypecaller";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
36 my $freeBayes = "$outdir/$sample_name.freeBayes";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
37 my $atlas2_indel = "$outdir/$sample_name.atlas2_indel";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
38 my $phase = "$outdir/$sample_name.phase";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
39 my $cover = "$outdir/$sample_name.read_coverage";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
40 my $flagstat = "$outdir/$sample_name.flagstat";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
41
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
42 my @cmds;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
43 @cmds = ( "$dirname/gatk_haplotypecaller_parallel $bam ".($max_processes > 5 ? int($max_processes*2/3+0.5) : 2)." $gatk.vcf -R $fasta --emitRefConfidence GVCF -variant_index_type LINEAR -variant_index_parameter 128000 &>> $log",
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
44 "$dirname/freebayes_parallel $bam ".($max_processes > 5 ? int(($max_processes-1)/3+0.5) : 2)." $freeBayes.vcf -j -C 2 -i -f $fasta -X &>> $log",
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
45 "samtools phase -F $bam > $phase.txt 2>> $log");
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
46 &run_cmds($max_processes, @cmds);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
47
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
48 @cmds = ("$dirname/vcf2hgvs_table -q -d 2 -p 0.05 -h 0.1 -u $dbs_dir/internal_runs.sorted.vcf.gz -m $dbs_dir/mappability/hg19.crgRefSeqExomeMappability75mer.txt -x $bed -i $gatk.vcf -o $gatk.hgvs.txt -s $dbs_dir/dbsnp_1000g_esp6500.latest.vcf.gz -r $dbs_dir/hg19.fa -e $dbs_dir/hg19_refGene_gencode_ultrasensitive.gtf -b $gene_regions -z $phase.txt",
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
49 "$dirname/vcf2hgvs_table -q -d 2 -p 0.05 -h 0.1 -u $dbs_dir/internal_runs.sorted.vcf.gz -m $dbs_dir/mappability/hg19.crgRefSeqExomeMappability75mer.txt -x $bed -i $freeBayes.vcf -o $freeBayes.hgvs.txt -s $dbs_dir/dbsnp_1000g_esp6500.latest.vcf.gz -r $dbs_dir/hg19.fa -e $dbs_dir/hg19_refGene_gencode_ultrasensitive.gtf -b $gene_regions -z $phase.txt");
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
50 &run_cmds($max_processes, @cmds);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
51
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
52 my $combined = "$outdir/$sample_name.combined";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
53 system "$dirname/combine_hgvs_tables -q true $combined.hgvs.txt GATKHaplotypeCaller $gatk.hgvs.txt FreeBayes $freeBayes.hgvs.txt >> $log";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
54 system "$dirname/hgvs_collapse_transcripts $combined.hgvs.txt $combined.collapsed.hgvs.txt 1 >> $log";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
55 system "$dirname/split_hgvs_by_confidence $combined.collapsed.hgvs.txt $combined.collapsed.confident.hgvs.txt $combined.collapsed.marginal.hgvs.txt 2 &>> $log";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
56
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
57 # Check the logs for anything untowards
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
58 # e.g. FreeBayes: jumping is disabled
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
59 # or any line saying "error"
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
60 my @errors;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
61 open(LOG, $log)
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
62 or die "Could not check log file for error message, therefore the validity of the genotypes cannot be guaranteed\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
63 while(<LOG>){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
64 if(/error/i or /jumping is disabled/){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
65 push @errors, $_;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
66 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
67 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
68 close(LOG);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
69
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
70 if(@errors){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
71 print "Errors were detected in the genotyping log:\n", @errors;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
72 exit 1; # failure
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
73 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
74 exit 0; #success
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
75
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
76 sub run_cmds{
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
77
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
78 my ($max_cmds, @cmd) = @_;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
79
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
80 my ($num_children, $pid);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
81
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
82 for($num_children = 1; $num_children < $max_cmds && @cmds; $num_children++){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
83 # initialize the number of child processes at 1, and increment it by one
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
84 #while it is less than $max_cmds
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
85
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
86 my $cmd = shift (@cmds);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
87 if($pid = fork) {
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
88 # do nothing if parent
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
89 } elsif(defined $pid) { # $pid is zero here if defined
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
90 #print STDERR $cmd, "\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
91 system $cmd;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
92 exit;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
93 } else {
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
94 #weird fork error
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
95 die "Can't fork: $!\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
96 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
97 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
98
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
99 while(@cmds) {
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
100 undef $pid;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
101 FORK: {
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
102 my $cmd = shift (@cmds);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
103 if($pid = fork) {
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
104 # parent here
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
105 $num_children++;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
106 wait;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
107 $num_children--;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
108 next;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
109
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
110 } elsif(defined $pid) { # $pid is zero here if defined
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
111 #print STDERR $cmd, "\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
112 system $cmd;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
113 exit;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
114
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
115 } else {
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
116 #weird fork error
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
117 die "Can't fork: $!\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
118 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
119 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
120 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
121 wait while $num_children--;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
122 }