Mercurial > repos > rdaveau > gfap
comparison gfapts/gfap_r1.0_allvar_genomic_annotater.pl @ 0:f753b30013e6 draft
Uploaded
author | rdaveau |
---|---|
date | Fri, 29 Jun 2012 10:20:55 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f753b30013e6 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use warnings FATAL => qw[ numeric uninitialized ]; | |
5 use File::Basename; | |
6 use Getopt::Long; | |
7 | |
8 sub sepind{ | |
9 $_=shift @_ foreach my($str, $sep); | |
10 my($pos, @pos); | |
11 $pos=0; | |
12 while(1){ | |
13 $pos=index($str, $sep, $pos); | |
14 last if($pos<0); | |
15 push @pos, $pos++; | |
16 } | |
17 return \@pos; | |
18 } | |
19 | |
20 my($varfile, $buildver, $refseq_dir, $cosmic_dir, $refseq_release, $cosmic_release, $annovar_release, $outdir, $noncoding, $coding, $cos, $ogs, $mid, $pid, $cno, $pno); | |
21 my(@buffer, @header, @ogs, @mid, @cno, @pno, @sep, %buffer, %mid, %ogs, %opts); | |
22 | |
23 GetOptions(\%opts, "varfile=s", "buildver=s", "refseq_dir=s", "refseq_release=s", "cosmic_dir=s", "cosmic_release=s", "annovar_release=s", "outdir=s", "noncoding=s", "coding=s"); | |
24 $varfile = $opts{varfile}; | |
25 $buildver = $opts{buildver}; | |
26 $refseq_dir = $opts{refseq_dir}; | |
27 $refseq_release = $opts{refseq_release}; | |
28 $cosmic_dir = $opts{cosmic_dir}; | |
29 $cosmic_release = $opts{cosmic_release}; | |
30 $annovar_release = $opts{annovar_release}; | |
31 $outdir = $opts{outdir}; | |
32 $noncoding = $opts{noncoding}; | |
33 $coding = $opts{coding}; | |
34 | |
35 my %legend=( | |
36 'unk' => 'undefined column', | |
37 'chr' => 'chromosome identifier', | |
38 'start' => "${buildver} 1-based start position", | |
39 'end' => "${buildver} 1-based end position", | |
40 'ref' => 'reference allele', | |
41 'alt' => 'alternate allele', | |
42 'annot' => 'ig:intergenic; pp:1kb-upstream; 5|3u:UTR; in:intronic; ss:splice; nc:ncRNA', | |
43 'ogs' => 'official gene symbol(s)', | |
44 'cos' => "gene listed in cosmic ${cosmic_release} release", | |
45 'mid' => "RefSeq mRNA identifier(s) from human.protein.gpff ${refseq_release} release", | |
46 'pid' => "RefSeq protein identifier(s) from human.protein.gpff ${refseq_release} release", | |
47 'c.x' => 'ATG-based variant descriptor in mRNA', | |
48 'p.x' => 'ATG-based variant descriptor in protein' | |
49 ); | |
50 | |
51 my $annovar_src_dir = 'inc/annovar'; | |
52 my $annovar_db_dir = "db/annovar/${annovar_release}"; | |
53 | |
54 my $fname = readlink($varfile) || $varfile; | |
55 $fname = basename($fname); | |
56 | |
57 `${annovar_src_dir}/annotate_variation.pl -buildver $buildver ${outdir}/${fname} $annovar_db_dir 2> /dev/null` and die $!; | |
58 | |
59 open IN, "<${refseq_dir}/mid2pid_${refseq_release}.txt" or die $!; | |
60 while(<IN>){ | |
61 next if /^#/; | |
62 /^(\S+)\s+(\S+)/; | |
63 $mid{$1}=$2; | |
64 } | |
65 close IN; | |
66 | |
67 open IN, "<${cosmic_dir}/${buildver}_cosmic_ogs_${cosmic_release}.txt" or die $!; | |
68 chomp and $ogs{$_}++ while(<IN>); | |
69 close IN; | |
70 | |
71 open IN, "<${outdir}/${fname}" or die $!; | |
72 while(<IN>){ | |
73 last if $_!~/^#/; | |
74 last if $_!~/=/; | |
75 chomp; | |
76 /^#(\S+)\s{1}=\s{1}(.+)/; | |
77 push @header, $1; | |
78 $legend{$1}=$2; | |
79 } | |
80 if(!scalar(@header)){ | |
81 @header=('chr', 'start', 'end', 'ref', 'alt'); | |
82 $_=readline(IN); | |
83 @_=split /\t/, $_; | |
84 $_=$#_-4; | |
85 push @header, ('unk')x$_ if($_!=0); | |
86 } | |
87 close IN; | |
88 push @header, ('annot', 'ogs', 'cos'); | |
89 open OUT, ">${outdir}/${fname}.nc" or die $!; | |
90 print OUT "#", join(' = ', $_, $legend{$_}), "\n" foreach @header; | |
91 print OUT "#", join("\t", @header), "\n"; | |
92 open IN, "<${outdir}/${fname}.variant_function" or die $!; | |
93 while(<IN>){ | |
94 next if /exonic/; | |
95 s/^downstream/ig/; | |
96 s/;downstream//; | |
97 s/,/:/g; | |
98 s/(UTR(3|5))|(upstream)|(intronic)|(splicing)|(ncRNA)|(intergenic)/$1?"${2}u":$3?'pp':$4?'in':$5?'ss':$6?'nc':'ig'/eg; | |
99 chomp; | |
100 @buffer=split /\t/, $_; | |
101 $buffer[1]='na' if $buffer[0] eq 'ig'; | |
102 $buffer[1]=~s/([^;]+);(?:\S+)$/$1/ if $buffer[0]!~/;/; | |
103 print OUT join("\t", @buffer[2..$#buffer, 0..1], ($buffer[1] eq 'na')?'na':(exists $ogs{$buffer[1]})?'TRUE':'FALSE'), "\n"; | |
104 } | |
105 close IN; | |
106 close OUT; | |
107 | |
108 $legend{annot}='fd:frameshift deletion; fi:frameshift insertion; nd:nonframeshift deletion; ni:nonframeshift insertion; bs:block substitution; ss:synonymous SNV; ns:nonsynonymous SNV; sg:stopgain; sl:stoploss; na:unknown'; | |
109 push @header, ('mid', 'pid', 'c.x', 'p.x'); | |
110 | |
111 open IN, "${outdir}/${fname}.exonic_variant_function" or die $!; | |
112 open OUT, ">${outdir}/${fname}.cds" or die $!; | |
113 print OUT "#", join(' = ', $_, $legend{$_}), "\n" foreach @header; | |
114 print OUT "#", join("\t", @header), "\n"; | |
115 while(<IN>){ | |
116 next if /unknown/; | |
117 s/^\S+\s+//; | |
118 chomp; | |
119 %buffer=(); | |
120 @{$_}=() foreach (\@ogs, \@mid, \@cno, \@pno, \@sep); | |
121 @buffer=split /\t/, $_; | |
122 $buffer[0]=~s/(nonf\w+\s{1}(d|i|s)\w+)|(\w+\s{1}(d|i)\w+)|(stop(\w){1}.+)|(^(n|s).+)|(.+)/$1?(($2 eq 's')?'b':'n').$2:$3?"f$4":$5?"s$6":$7?"${8}s":'na'/eg; | |
123 foreach (split /,/, $buffer[1]){ | |
124 @_=split /:/, $_; | |
125 splice(@_, 2, 1); | |
126 $_=shift(@_) || 'na' foreach ($ogs, $mid, $cno, $pno); | |
127 $buffer{ogs}->{$ogs}->{$cno}->{$mid}++; | |
128 $buffer{ono}->{$cno}=$pno; | |
129 } | |
130 $cos=0; | |
131 foreach $ogs (@ogs=keys %{$buffer{ogs}}){ | |
132 push @cno, join('|', (@_=keys %{$buffer{ogs}->{$ogs}})); | |
133 unshift @pno, $buffer{ono}->{$_} foreach reverse @_; | |
134 $pno=join('|', @pno[0..$#_]); | |
135 splice(@pno, 0, $#_+1); | |
136 push @pno, $pno; | |
137 foreach $cno (@_){ | |
138 push @mid, join(':', keys %{$buffer{ogs}->{$ogs}->{$cno}}); | |
139 } | |
140 $cos++ if exists $ogs{$ogs}; | |
141 } | |
142 $mid=join('|', @mid); | |
143 $cno=join(';', @cno); | |
144 if($#ogs!=0){ | |
145 (my $sep=$cno)=~s/[^;\|:]+//g; | |
146 @_=@{sepind($mid, '|')}[@{sepind($sep, ';')}]; | |
147 substr($mid, $_, 1)=';' foreach @_; | |
148 } | |
149 ($pid=$mid)=~s/([^;\|:]+)/$mid{$1} || 'na'/eg; | |
150 push @buffer, shift @buffer, join(';', @ogs), ($cos!=0)?'TRUE':'FALSE', $mid, $pid, $cno, join(';', @pno); | |
151 shift @buffer; | |
152 print OUT join("\t", @buffer), "\n"; | |
153 } | |
154 close IN; | |
155 close OUT; | |
156 system "rm $noncoding $coding ${outdir}/${fname}*variant_function ${outdir}/${fname}*invalid*; ln -s ${outdir}/${fname}.nc $noncoding; ln -s ${outdir}/${fname}.cds $coding" and die $!; |