annotate varscan_somatic.pl @ 2:90c9b7a6e58b draft

Uploaded
author geert-vandeweyer
date Fri, 07 Mar 2014 06:18:29 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/perl
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
2
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
3
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
4 use strict;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
5 use Cwd;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
6
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
7 die qq(
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
8 Bad numbr of inputs
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
9
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
10 ) if(!@ARGV);
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
11
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
12 my $options ="";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
13 my $normal="";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
14 my $command="";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
15 my $tumor="";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
16 my $tumorbam = "";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
17 my $output="";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
18 my $snp="";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
19 my $indel="";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
20 my $working_dir = cwd();
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
21 my $log = '';
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
22
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
23 foreach my $input (@ARGV)
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
24 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
25 my @tmp = split "::", $input;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
26 if($tmp[0] eq "COMMAND")
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
27 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
28 $command = $tmp[1];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
29 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
30 elsif($tmp[0] eq "NORMAL")
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
31 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
32 $normal = $tmp[1];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
33 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
34 elsif($tmp[0] eq "TUMOR")
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
35 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
36 $tumor = $tmp[1];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
37 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
38 elsif($tmp[0] eq "TUMORBAM")
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
39 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
40 $tumorbam = $tmp[1];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
41 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
42 elsif($tmp[0] eq "OPTION")
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
43 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
44 my @p = split(/\s+/,$tmp[1]);
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
45 if ($p[0] =~ m/validation|strand-filter|output-vcf/ && $p[1] == 0) {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
46 next;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
47 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
48 $options = "$options ${tmp[1]}";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
49 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
50 elsif($tmp[0] eq "OUTPUT")
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
51 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
52 $output = $tmp[1];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
53 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
54 elsif($tmp[0] eq "SNP")
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
55 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
56 $snp = $tmp[1];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
57 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
58 elsif($tmp[0] eq "INDEL")
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
59 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
60 $indel = $tmp[1];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
61 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
62
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
63 elsif($tmp[0] eq "LOG")
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
64 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
65 $log = $tmp[1];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
66 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
67
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
68 else
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
69 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
70 die("Unknown Input: $input\n");
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
71 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
72 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
73
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
74 ## VCF OUTPUT
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
75 if ($output ne '') {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
76 system ("$command $normal $tumor $options --output-snp $working_dir/out.snp --output-indel $working_dir/out.indel 2>$log");
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
77 my $indels = "$working_dir/out.indel.vcf";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
78 my $snps = "$working_dir/out.snp.vcf";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
79
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
80 system("grep -v '^\#' $indels | grep -v '^chrom position' >> $snps");
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
81 my @chr_ord = chromosome_order($tumorbam);
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
82
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
83 vs2vcf($snps, $output,\@chr_ord);
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
84 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
85 ## SNP/INDEL OUTPUT
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
86 else {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
87 system ("$command $normal $tumor $options --output-snp $snp --output-indel $indel 2>$log");
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
88 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
89
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
90 sub vs2vcf
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
91 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
92
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
93 #
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
94 # G l o b a l v a r i a b l e s
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
95 #
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
96 my $version = '0.1';
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
97
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
98 #
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
99 # Read in file
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
100 #
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
101 my $input = shift;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
102 my $output = shift;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
103 my $chr_ord = shift;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
104 open(IN, $input) or die "Can't open $input': $!\n";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
105 open(OUT, ">$output") or die "Can't create $output': $!\n";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
106 my %output;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
107
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
108 while ( <IN> )
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
109 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
110 if ( /^#/ )
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
111 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
112 print OUT;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
113 next;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
114 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
115 chomp;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
116 my $line = $_;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
117
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
118 my @flds = split ( "\t", $line );
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
119 my $ref = $flds[3];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
120 my $alt = $flds[4];
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
121 #
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
122 # Deletion of bases
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
123 #
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
124 if ( $alt =~ /^\-/ )
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
125 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
126 ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
127 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
128
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
129 #
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
130 # Insertion of bases
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
131 #
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
132 if ( $alt =~ /^\+/ )
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
133 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
134 $flds[4] = $ref.substr($alt,1);
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
135 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
136 ## insert dot for reference positions.
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
137 if ($flds[4] eq '') {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
138 $flds[4] = '.';
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
139 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
140 print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
141 $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
142 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
143 close(IN);
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
144 # if chromosome order given return in sorted order
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
145 if(defined $chr_ord)
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
146 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
147 for my $chrom (@{ $chr_ord })
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
148 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
149 for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} })
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
150 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
151 print OUT $output{$chrom}{$pos};
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
152 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
153 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
154 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
155 close(OUT);
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
156 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
157
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
158
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
159 sub chromosome_order
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
160 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
161 my $input = shift;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
162 # calculate flagstats
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
163 my $COMM = "samtools view -H $input | grep '^\@SQ'";
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
164 my @SQ = `$COMM`;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
165 chomp @SQ;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
166 for(my $i = 0; $i <= $#SQ; $i++)
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
167 {
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
168 $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/;
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
169 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
170 return(@SQ);
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
171 }
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
172
90c9b7a6e58b Uploaded
geert-vandeweyer
parents:
diff changeset
173