Mercurial > repos > geert-vandeweyer > varscan_wrapper
comparison varscan/varscan_processSomatic.pl @ 0:848f3dc54593 draft
Uploaded
author | geert-vandeweyer |
---|---|
date | Fri, 07 Mar 2014 06:17:32 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:848f3dc54593 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 | |
4 use strict; | |
5 use Cwd; | |
6 | |
7 die qq( | |
8 Bad numbr of inputs | |
9 | |
10 ) if(!@ARGV); | |
11 | |
12 my $options =""; | |
13 my $command=""; | |
14 my $input=""; | |
15 #my $output=""; | |
16 my $working_dir = cwd(); | |
17 my $log = ''; | |
18 my %outfiles; | |
19 foreach my $arg (@ARGV) | |
20 { | |
21 my @tmp = split "::", $arg; | |
22 if($tmp[0] eq "COMMAND") | |
23 { | |
24 $command = $tmp[1]; | |
25 } | |
26 elsif($tmp[0] eq "INPUT") | |
27 { | |
28 $input = $tmp[1]; | |
29 } | |
30 elsif($tmp[0] eq "OPTION") | |
31 { | |
32 my @p = split(/\s+/,$tmp[1]); | |
33 $options = "$options ${tmp[1]}"; | |
34 } | |
35 elsif($tmp[0] eq "OUTPUT") | |
36 { | |
37 my @p = split(/\s+/,$tmp[1]); | |
38 $p[0] = substr($p[0],2); | |
39 $outfiles{$p[0]} = $p[1]; | |
40 } | |
41 | |
42 elsif($tmp[0] eq "LOG") | |
43 { | |
44 $log = $tmp[1]; | |
45 } | |
46 else | |
47 { | |
48 die("Unknown Input: $input\n"); | |
49 } | |
50 } | |
51 system("ln -s $input $working_dir/in.dat"); | |
52 | |
53 | |
54 | |
55 ## RUN | |
56 $command = "$command $working_dir/in.dat $options > $log 2>&1"; | |
57 system($command); | |
58 | |
59 ## if tabular files are kept, write them to galaxy-datafile | |
60 if ($outfiles{'loh'} ne 'None') { | |
61 ## | |
62 system("cp '$working_dir/in.dat.LOH' '".$outfiles{'loh'}."'"); | |
63 system("cp '$working_dir/in.dat.LOH.hc' '".$outfiles{'loh'}."'"); | |
64 system("cp '$working_dir/in.dat.Germline' '".$outfiles{'germ'}."'"); | |
65 system("cp '$working_dir/in.dat.Germline.hc' '".$outfiles{'germ_hc'}."'"); | |
66 system("cp '$working_dir/in.dat.Somatic' '".$outfiles{'som'}."'"); | |
67 system("cp '$working_dir/in.dat.Somatic.hc' '".$outfiles{'som_hc'}."'"); | |
68 } | |
69 ## if vcf files are kept, generate them, and write to galaxy-datafile | |
70 if ($outfiles{'loh_hc_vcf'} ne 'None') { | |
71 tab2vcf($working_dir,'in.dat.LOH.hc',$outfiles{'loh_hc_vcf'}); | |
72 tab2vcf($working_dir,'in.dat.Germline.hc',$outfiles{'germ_hc_vcf'}); | |
73 tab2vcf($working_dir,'in.dat.Somatic.hc',$outfiles{'som_hc_vcf'}); | |
74 } | |
75 exit; | |
76 | |
77 sub tab2vcf | |
78 { | |
79 my $wd = shift; | |
80 my $in = shift; | |
81 my $out = shift; | |
82 open OUT, ">$out"; | |
83 print OUT "##fileformat=VCFv4.1\n"; | |
84 print OUT "##source=processSomatic\n"; | |
85 print OUT '##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">'."\n"; | |
86 print OUT '##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (Germline,Somatic,LOH)">'."\n"; | |
87 print OUT '##INFO=<ID=VPV,Number=1,Type=Float,Description="Variant P-value">'."\n"; | |
88 print OUT '##INFO=<ID=SPV,Number=1,Type=Float,Description="Somatic Variant P-value">'."\n"; | |
89 print OUT '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">'."\n"; | |
90 print OUT '##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">'."\n"; | |
91 print OUT '##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">'."\n"; | |
92 print OUT '##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">'."\n"; | |
93 print OUT '##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">'."\n"; | |
94 print OUT '##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">'."\n"; | |
95 print OUT '##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">'."\n"; | |
96 print OUT '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR'."\n"; | |
97 open IN, "$wd/$in"; | |
98 my $head = <IN>; | |
99 while (<IN>) { | |
100 chomp; | |
101 my @p = split(/\t/,$_); | |
102 my $td = $p[4] + $p[5] + $p[8] + $p[9]; | |
103 my $vpv = sprintf('%.3E',$p[13]); | |
104 my $spv = sprintf('%.3E',$p[14]); | |
105 my $info = "DP=$td;SS=$p[12];VPV=$vpv;SPV=$spv"; | |
106 ## gt normal string | |
107 my $gtnormal = ''; | |
108 if ("$p[7]" eq "$p[2]") { | |
109 $gtnormal = "0/0"; | |
110 } | |
111 elsif ("$p[7]" eq "$p[3]") { | |
112 $gtnormal = "1/1"; | |
113 } | |
114 else { | |
115 $gtnormal = "0/1"; | |
116 } | |
117 my $nsum = $p[4] + $p[5]; | |
118 $gtnormal .= ":.:$nsum:$p[4]:$p[5]:$p[6]:$p[19],$p[20],$p[21],$p[22]"; | |
119 ## gt tumor string | |
120 my $gttumor = ''; | |
121 if ("$p[11]" eq "$p[2]") { | |
122 $gttumor = "0/0"; | |
123 } | |
124 elsif ("$p[11]" eq "$p[3]") { | |
125 $gttumor = "1/1"; | |
126 } | |
127 else { | |
128 $gttumor = "0/1"; | |
129 } | |
130 my $tsum = $p[8] + $p[9]; | |
131 $gttumor .= ":.:$tsum:$p[8]:$p[9]:$p[10]:$p[15],$p[16],$p[17],$p[18]"; | |
132 | |
133 ## outline | |
134 my $line = "$p[0]\t$p[1]\t.\t$p[2]\t$p[3]\t.\tPASS\t$info\tGT:GQ:DP:RD:AD:FREQ:DP4\t$gtnormal\t$gttumor\n"; | |
135 print OUT $line; | |
136 } | |
137 close IN; | |
138 close OUT; | |
139 } | |
140 sub vs2vcf | |
141 { | |
142 | |
143 # | |
144 # G l o b a l v a r i a b l e s | |
145 # | |
146 my $version = '0.1'; | |
147 | |
148 # | |
149 # Read in file | |
150 # | |
151 my $input = shift; | |
152 my $output = shift; | |
153 my $chr_ord = shift; | |
154 open(IN, $input) or die "Can't open $input': $!\n"; | |
155 open(OUT, ">$output") or die "Can't create $output': $!\n"; | |
156 my %output; | |
157 | |
158 while ( <IN> ) | |
159 { | |
160 if ( /^#/ ) | |
161 { | |
162 print OUT; | |
163 next; | |
164 } | |
165 chomp; | |
166 my $line = $_; | |
167 | |
168 my @flds = split ( "\t", $line ); | |
169 my $ref = $flds[3]; | |
170 my $alt = $flds[4]; | |
171 # | |
172 # Deletion of bases | |
173 # | |
174 if ( $alt =~ /^\-/ ) | |
175 { | |
176 ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref); | |
177 } | |
178 | |
179 # | |
180 # Insertion of bases | |
181 # | |
182 if ( $alt =~ /^\+/ ) | |
183 { | |
184 $flds[4] = $ref.substr($alt,1); | |
185 } | |
186 ## insert dot for reference positions. | |
187 if ($flds[4] eq '') { | |
188 $flds[4] = '.'; | |
189 } | |
190 print OUT join( "\t", @flds),"\n" unless defined $chr_ord; | |
191 $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord; | |
192 } | |
193 close(IN); | |
194 # if chromosome order given return in sorted order | |
195 if(defined $chr_ord) | |
196 { | |
197 for my $chrom (@{ $chr_ord }) | |
198 { | |
199 for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} }) | |
200 { | |
201 print OUT $output{$chrom}{$pos}; | |
202 } | |
203 } | |
204 } | |
205 close(OUT); | |
206 } | |
207 | |
208 | |
209 sub chromosome_order | |
210 { | |
211 my $input = shift; | |
212 # calculate flagstats | |
213 my $COMM = "samtools view -H $input | grep '^\@SQ'"; | |
214 my @SQ = `$COMM`; | |
215 chomp @SQ; | |
216 for(my $i = 0; $i <= $#SQ; $i++) | |
217 { | |
218 $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/; | |
219 } | |
220 return(@SQ); | |
221 } | |
222 | |
223 |