annotate varscan/varscan_somatic.pl @ 0:848f3dc54593 draft

Uploaded
author geert-vandeweyer
date Fri, 07 Mar 2014 06:17:32 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/perl
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
2
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
3
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
4 use strict;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
5 use Cwd;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
6
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
7 die qq(
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
8 Bad numbr of inputs
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
9
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
10 ) if(!@ARGV);
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
11
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
12 my $options ="";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
13 my $normal="";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
14 my $command="";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
15 my $tumor="";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
16 my $tumorbam = "";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
17 my $output="";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
18 my $snp="";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
19 my $indel="";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
20 my $working_dir = cwd();
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
21 my $log = '';
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
22
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
23 foreach my $input (@ARGV)
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
24 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
25 my @tmp = split "::", $input;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
26 if($tmp[0] eq "COMMAND")
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
27 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
28 $command = $tmp[1];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
29 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
30 elsif($tmp[0] eq "NORMAL")
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
31 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
32 $normal = $tmp[1];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
33 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
34 elsif($tmp[0] eq "TUMOR")
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
35 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
36 $tumor = $tmp[1];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
37 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
38 elsif($tmp[0] eq "TUMORBAM")
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
39 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
40 $tumorbam = $tmp[1];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
41 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
42 elsif($tmp[0] eq "OPTION")
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
43 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
44 my @p = split(/\s+/,$tmp[1]);
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
45 if ($p[0] =~ m/validation|strand-filter|output-vcf/ && $p[1] == 0) {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
46 next;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
47 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
48 $options = "$options ${tmp[1]}";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
49 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
50 elsif($tmp[0] eq "OUTPUT")
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
51 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
52 $output = $tmp[1];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
53 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
54 elsif($tmp[0] eq "SNP")
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
55 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
56 $snp = $tmp[1];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
57 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
58 elsif($tmp[0] eq "INDEL")
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
59 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
60 $indel = $tmp[1];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
61 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
62
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
63 elsif($tmp[0] eq "LOG")
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
64 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
65 $log = $tmp[1];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
66 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
67
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
68 else
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
69 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
70 die("Unknown Input: $input\n");
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
71 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
72 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
73
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
74 ## VCF OUTPUT
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
75 if ($output ne '') {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
76 system ("$command $normal $tumor $options --output-snp $working_dir/out.snp --output-indel $working_dir/out.indel 2>$log");
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
77 my $indels = "$working_dir/out.indel.vcf";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
78 my $snps = "$working_dir/out.snp.vcf";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
79
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
80 system("grep -v '^\#' $indels | grep -v '^chrom position' >> $snps");
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
81 my @chr_ord = chromosome_order($tumorbam);
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
82
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
83 vs2vcf($snps, $output,\@chr_ord);
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
84 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
85 ## SNP/INDEL OUTPUT
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
86 else {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
87 system ("$command $normal $tumor $options --output-snp $snp --output-indel $indel 2>$log");
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
88 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
89
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
90 sub vs2vcf
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
91 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
92
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
93 #
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
94 # G l o b a l v a r i a b l e s
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
95 #
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
96 my $version = '0.1';
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
97
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
98 #
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
99 # Read in file
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
100 #
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
101 my $input = shift;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
102 my $output = shift;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
103 my $chr_ord = shift;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
104 open(IN, $input) or die "Can't open $input': $!\n";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
105 open(OUT, ">$output") or die "Can't create $output': $!\n";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
106 my %output;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
107
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
108 while ( <IN> )
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
109 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
110 if ( /^#/ )
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
111 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
112 print OUT;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
113 next;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
114 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
115 chomp;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
116 my $line = $_;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
117
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
118 my @flds = split ( "\t", $line );
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
119 my $ref = $flds[3];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
120 my $alt = $flds[4];
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
121 #
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
122 # Deletion of bases
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
123 #
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
124 if ( $alt =~ /^\-/ )
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
125 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
126 ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
127 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
128
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
129 #
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
130 # Insertion of bases
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
131 #
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
132 if ( $alt =~ /^\+/ )
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
133 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
134 $flds[4] = $ref.substr($alt,1);
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
135 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
136 ## insert dot for reference positions.
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
137 if ($flds[4] eq '') {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
138 $flds[4] = '.';
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
139 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
140 print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
141 $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
142 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
143 close(IN);
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
144 # if chromosome order given return in sorted order
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
145 if(defined $chr_ord)
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
146 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
147 for my $chrom (@{ $chr_ord })
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
148 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
149 for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} })
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
150 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
151 print OUT $output{$chrom}{$pos};
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
152 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
153 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
154 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
155 close(OUT);
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
156 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
157
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
158
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
159 sub chromosome_order
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
160 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
161 my $input = shift;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
162 # calculate flagstats
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
163 my $COMM = "samtools view -H $input | grep '^\@SQ'";
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
164 my @SQ = `$COMM`;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
165 chomp @SQ;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
166 for(my $i = 0; $i <= $#SQ; $i++)
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
167 {
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
168 $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/;
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
169 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
170 return(@SQ);
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
171 }
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
172
848f3dc54593 Uploaded
geert-vandeweyer
parents:
diff changeset
173