0
|
1 #!/usr/bin/perl
|
|
2
|
|
3
|
|
4 use strict;
|
|
5 use Cwd;
|
|
6
|
|
7 die qq(
|
|
8 Bad numbr of inputs
|
|
9
|
|
10 ) if(!@ARGV);
|
|
11
|
|
12 my $options ="";
|
|
13 my $command="";
|
|
14 my $input="";
|
|
15 #my $output="";
|
|
16 my $working_dir = cwd();
|
|
17 my $log = '';
|
|
18 my %outfiles;
|
|
19 foreach my $arg (@ARGV)
|
|
20 {
|
|
21 my @tmp = split "::", $arg;
|
|
22 if($tmp[0] eq "COMMAND")
|
|
23 {
|
|
24 $command = $tmp[1];
|
|
25 }
|
|
26 elsif($tmp[0] eq "INPUT")
|
|
27 {
|
|
28 $input = $tmp[1];
|
|
29 }
|
|
30 elsif($tmp[0] eq "OPTION")
|
|
31 {
|
|
32 my @p = split(/\s+/,$tmp[1]);
|
|
33 $options = "$options ${tmp[1]}";
|
|
34 }
|
|
35 elsif($tmp[0] eq "OUTPUT")
|
|
36 {
|
|
37 my @p = split(/\s+/,$tmp[1]);
|
|
38 $p[0] = substr($p[0],2);
|
|
39 $outfiles{$p[0]} = $p[1];
|
|
40 }
|
|
41
|
|
42 elsif($tmp[0] eq "LOG")
|
|
43 {
|
|
44 $log = $tmp[1];
|
|
45 }
|
|
46 else
|
|
47 {
|
|
48 die("Unknown Input: $input\n");
|
|
49 }
|
|
50 }
|
|
51 system("ln -s $input $working_dir/in.dat");
|
|
52
|
|
53
|
|
54
|
|
55 ## RUN
|
|
56 $command = "$command $working_dir/in.dat $options > $log 2>&1";
|
|
57 system($command);
|
|
58
|
|
59 ## if tabular files are kept, write them to galaxy-datafile
|
|
60 if ($outfiles{'loh'} ne 'None') {
|
|
61 ##
|
|
62 system("cp '$working_dir/in.dat.LOH' '".$outfiles{'loh'}."'");
|
|
63 system("cp '$working_dir/in.dat.LOH.hc' '".$outfiles{'loh'}."'");
|
|
64 system("cp '$working_dir/in.dat.Germline' '".$outfiles{'germ'}."'");
|
|
65 system("cp '$working_dir/in.dat.Germline.hc' '".$outfiles{'germ_hc'}."'");
|
|
66 system("cp '$working_dir/in.dat.Somatic' '".$outfiles{'som'}."'");
|
|
67 system("cp '$working_dir/in.dat.Somatic.hc' '".$outfiles{'som_hc'}."'");
|
|
68 }
|
|
69 ## if vcf files are kept, generate them, and write to galaxy-datafile
|
|
70 if ($outfiles{'loh_hc_vcf'} ne 'None') {
|
|
71 tab2vcf($working_dir,'in.dat.LOH.hc',$outfiles{'loh_hc_vcf'});
|
|
72 tab2vcf($working_dir,'in.dat.Germline.hc',$outfiles{'germ_hc_vcf'});
|
|
73 tab2vcf($working_dir,'in.dat.Somatic.hc',$outfiles{'som_hc_vcf'});
|
|
74 }
|
|
75 exit;
|
|
76
|
|
77 sub tab2vcf
|
|
78 {
|
|
79 my $wd = shift;
|
|
80 my $in = shift;
|
|
81 my $out = shift;
|
|
82 open OUT, ">$out";
|
|
83 print OUT "##fileformat=VCFv4.1\n";
|
|
84 print OUT "##source=processSomatic\n";
|
|
85 print OUT '##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">'."\n";
|
|
86 print OUT '##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (Germline,Somatic,LOH)">'."\n";
|
|
87 print OUT '##INFO=<ID=VPV,Number=1,Type=Float,Description="Variant P-value">'."\n";
|
|
88 print OUT '##INFO=<ID=SPV,Number=1,Type=Float,Description="Somatic Variant P-value">'."\n";
|
|
89 print OUT '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">'."\n";
|
|
90 print OUT '##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">'."\n";
|
|
91 print OUT '##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">'."\n";
|
|
92 print OUT '##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">'."\n";
|
|
93 print OUT '##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">'."\n";
|
|
94 print OUT '##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">'."\n";
|
|
95 print OUT '##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">'."\n";
|
|
96 print OUT '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR'."\n";
|
|
97 open IN, "$wd/$in";
|
|
98 my $head = <IN>;
|
|
99 while (<IN>) {
|
|
100 chomp;
|
|
101 my @p = split(/\t/,$_);
|
|
102 my $td = $p[4] + $p[5] + $p[8] + $p[9];
|
|
103 my $vpv = sprintf('%.3E',$p[13]);
|
|
104 my $spv = sprintf('%.3E',$p[14]);
|
|
105 my $info = "DP=$td;SS=$p[12];VPV=$vpv;SPV=$spv";
|
|
106 ## gt normal string
|
|
107 my $gtnormal = '';
|
|
108 if ("$p[7]" eq "$p[2]") {
|
|
109 $gtnormal = "0/0";
|
|
110 }
|
|
111 elsif ("$p[7]" eq "$p[3]") {
|
|
112 $gtnormal = "1/1";
|
|
113 }
|
|
114 else {
|
|
115 $gtnormal = "0/1";
|
|
116 }
|
|
117 my $nsum = $p[4] + $p[5];
|
|
118 $gtnormal .= ":.:$nsum:$p[4]:$p[5]:$p[6]:$p[19],$p[20],$p[21],$p[22]";
|
|
119 ## gt tumor string
|
|
120 my $gttumor = '';
|
|
121 if ("$p[11]" eq "$p[2]") {
|
|
122 $gttumor = "0/0";
|
|
123 }
|
|
124 elsif ("$p[11]" eq "$p[3]") {
|
|
125 $gttumor = "1/1";
|
|
126 }
|
|
127 else {
|
|
128 $gttumor = "0/1";
|
|
129 }
|
|
130 my $tsum = $p[8] + $p[9];
|
|
131 $gttumor .= ":.:$tsum:$p[8]:$p[9]:$p[10]:$p[15],$p[16],$p[17],$p[18]";
|
|
132
|
|
133 ## outline
|
|
134 my $line = "$p[0]\t$p[1]\t.\t$p[2]\t$p[3]\t.\tPASS\t$info\tGT:GQ:DP:RD:AD:FREQ:DP4\t$gtnormal\t$gttumor\n";
|
|
135 print OUT $line;
|
|
136 }
|
|
137 close IN;
|
|
138 close OUT;
|
|
139 }
|
|
140 sub vs2vcf
|
|
141 {
|
|
142
|
|
143 #
|
|
144 # G l o b a l v a r i a b l e s
|
|
145 #
|
|
146 my $version = '0.1';
|
|
147
|
|
148 #
|
|
149 # Read in file
|
|
150 #
|
|
151 my $input = shift;
|
|
152 my $output = shift;
|
|
153 my $chr_ord = shift;
|
|
154 open(IN, $input) or die "Can't open $input': $!\n";
|
|
155 open(OUT, ">$output") or die "Can't create $output': $!\n";
|
|
156 my %output;
|
|
157
|
|
158 while ( <IN> )
|
|
159 {
|
|
160 if ( /^#/ )
|
|
161 {
|
|
162 print OUT;
|
|
163 next;
|
|
164 }
|
|
165 chomp;
|
|
166 my $line = $_;
|
|
167
|
|
168 my @flds = split ( "\t", $line );
|
|
169 my $ref = $flds[3];
|
|
170 my $alt = $flds[4];
|
|
171 #
|
|
172 # Deletion of bases
|
|
173 #
|
|
174 if ( $alt =~ /^\-/ )
|
|
175 {
|
|
176 ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
|
|
177 }
|
|
178
|
|
179 #
|
|
180 # Insertion of bases
|
|
181 #
|
|
182 if ( $alt =~ /^\+/ )
|
|
183 {
|
|
184 $flds[4] = $ref.substr($alt,1);
|
|
185 }
|
|
186 ## insert dot for reference positions.
|
|
187 if ($flds[4] eq '') {
|
|
188 $flds[4] = '.';
|
|
189 }
|
|
190 print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
|
|
191 $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
|
|
192 }
|
|
193 close(IN);
|
|
194 # if chromosome order given return in sorted order
|
|
195 if(defined $chr_ord)
|
|
196 {
|
|
197 for my $chrom (@{ $chr_ord })
|
|
198 {
|
|
199 for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} })
|
|
200 {
|
|
201 print OUT $output{$chrom}{$pos};
|
|
202 }
|
|
203 }
|
|
204 }
|
|
205 close(OUT);
|
|
206 }
|
|
207
|
|
208
|
|
209 sub chromosome_order
|
|
210 {
|
|
211 my $input = shift;
|
|
212 # calculate flagstats
|
|
213 my $COMM = "samtools view -H $input | grep '^\@SQ'";
|
|
214 my @SQ = `$COMM`;
|
|
215 chomp @SQ;
|
|
216 for(my $i = 0; $i <= $#SQ; $i++)
|
|
217 {
|
|
218 $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/;
|
|
219 }
|
|
220 return(@SQ);
|
|
221 }
|
|
222
|
|
223
|