# HG changeset patch # User rdaveau # Date 1343987441 14400 # Node ID 028f435b6cfb94085e5271a566646a49bff2c263 # Parent f753b30013e686d20b4cf3fdb53129a587f948e5 Uploaded diff -r f753b30013e6 -r 028f435b6cfb gfapts/README diff -r f753b30013e6 -r 028f435b6cfb gfapts/gfap_r1.0_allvar_genomic_annotater.xml diff -r f753b30013e6 -r 028f435b6cfb gfapts/gfap_r1.0_cdsvar_functional_annotater.xml diff -r f753b30013e6 -r 028f435b6cfb gfapts/gfap_r1.0_known_var_finder.pl --- a/gfapts/gfap_r1.0_known_var_finder.pl Fri Jun 29 10:20:55 2012 -0400 +++ b/gfapts/gfap_r1.0_known_var_finder.pl Fri Aug 03 05:50:41 2012 -0400 @@ -1,12 +1,91 @@ #!/usr/bin/perl use strict; -use lib 'inc/perlmod'; -use ngsutil qw[ :DEFAULT &varscan ]; +#use lib 'inc/perlmod'; +#use ngsutil qw[ :DEFAULT &varscan ]; use warnings FATAL => qw[ numeric uninitialized ]; use File::Basename; use Getopt::Long; +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# TEMP include ngsutil.pm +sub explode_varcall{ + my $N=0; + $_=shift @_ foreach my($POS, $REF, $ALT); + $_=$POS foreach my($START, $END); + my(@length, @range, @idx, @VAR, @POS); + @{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS); + push @length, length($_) foreach ($REF, $ALT); + @range=sort{ $a<=>$b } @length; + if($range[0]==1){ + if($range[1]!=1){ + foreach ($REF, $ALT){ + $_=substr($_, 1); + $_=~s/^$/-/; + } + if($length[0]!=1){ + $END+=$length[0]-1; + $START++; + } + } + push @POS, $START, $END; + push @VAR, $REF, $ALT; + }else{ + my @N=(); + undef $_ foreach my ($i, $VAR); + $_-=2 foreach (@length, @range); + $_++ foreach ($START, $END); + $_=substr($_, 1) foreach ($REF, $ALT); + my $indel='-' x ($range[1]-$range[0]); + $VAR.=($_>$range[0])? + ('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))? + 0:1) for 0 .. $range[1]; + $N++ while $VAR =~ /0/g; + if($length[0]<$length[1]){ + @VAR=($VAR); + @N=($N); + $N=0; + undef($VAR); + $VAR.=($_>$range[0])? + ('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))? + 0:1) for reverse 0 .. $range[1]; + $N++ while $VAR =~ /0/g; + if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); } + else{ $REF=$indel . $REF; } + }else{ $ALT.=$indel; } + foreach (qw[ 0 \- ]){ + push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g); + } + @{$_}=() foreach (\@VAR, \@POS); + foreach my $k (@idx){ + push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT); + push @POS, ${$k}[0], sum(@{$k})-1; + } + $_+=$START foreach @POS; + $_=~s/\-+/\-/ foreach @VAR; + for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; } + } + return(\@POS, \@VAR); + } + +sub varscan{ + $_=shift @_ foreach my($kname, $fpath, $href); + my($k, @buffer); + open IN, "<$fpath" or die $!; + while(){ + next if /^#/; + chomp; + @buffer=split /\s+/, $_; + next if !exists $$href{($k=join(':', @buffer[0..2]))}; + next if $$href{$k}->{ref} !~ $buffer[3]; + next if $$href{$k}->{alt} !~ $buffer[4]; + splice(@buffer, 0, 5); + $$href{$k}->{$kname}=join(':', @buffer); + } + close IN; + } +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + my($varfile, $buildver, $outdir, $dir_1000g, $dir_dbsnp, $dir_cosmic, $release_1000g, $release_dbsnp, $release_cosmic, $outfile, $k, @buffer, @varlist, %opts, %varlist); GetOptions(\%opts, "varfile=s", "buildver=s", "outdir=s", "dir_1000g=s", "dir_dbsnp=s", "dir_cosmic=s", "release_1000g=s", "release_dbsnp=s", "release_cosmic=s", "outfile=s"); diff -r f753b30013e6 -r 028f435b6cfb gfapts/gfap_r1.0_known_var_finder.pl~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gfapts/gfap_r1.0_known_var_finder.pl~ Fri Aug 03 05:50:41 2012 -0400 @@ -0,0 +1,177 @@ +#!/usr/bin/perl + +use strict; +#use lib 'inc/perlmod'; +#use ngsutil qw[ :DEFAULT &varscan ]; +use warnings FATAL => qw[ numeric uninitialized ]; +use File::Basename; +use Getopt::Long; + +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# ngsutil.pm +sub explode_varcall{ + my $N=0; + $_=shift @_ foreach my($POS, $REF, $ALT); + $_=$POS foreach my($START, $END); + my(@length, @range, @idx, @VAR, @POS); + @{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS); + push @length, length($_) foreach ($REF, $ALT); + @range=sort{ $a<=>$b } @length; + if($range[0]==1){ + if($range[1]!=1){ + foreach ($REF, $ALT){ + $_=substr($_, 1); + $_=~s/^$/-/; + } + if($length[0]!=1){ + $END+=$length[0]-1; + $START++; + } + } + push @POS, $START, $END; + push @VAR, $REF, $ALT; + }else{ + my @N=(); + undef $_ foreach my ($i, $VAR); + $_-=2 foreach (@length, @range); + $_++ foreach ($START, $END); + $_=substr($_, 1) foreach ($REF, $ALT); + my $indel='-' x ($range[1]-$range[0]); + $VAR.=($_>$range[0])? + ('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))? + 0:1) for 0 .. $range[1]; + $N++ while $VAR =~ /0/g; + if($length[0]<$length[1]){ + @VAR=($VAR); + @N=($N); + $N=0; + undef($VAR); + $VAR.=($_>$range[0])? + ('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))? + 0:1) for reverse 0 .. $range[1]; + $N++ while $VAR =~ /0/g; + if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); } + else{ $REF=$indel . $REF; } + }else{ $ALT.=$indel; } + foreach (qw[ 0 \- ]){ + push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g); + } + @{$_}=() foreach (\@VAR, \@POS); + foreach my $k (@idx){ + push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT); + push @POS, ${$k}[0], sum(@{$k})-1; + } + $_+=$START foreach @POS; + $_=~s/\-+/\-/ foreach @VAR; + for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; } + } + return(\@POS, \@VAR); + } + +sub varscan{ + $_=shift @_ foreach my($kname, $fpath, $href); + my($k, @buffer); + open IN, "<$fpath" or die $!; + while(){ + next if /^#/; + chomp; + @buffer=split /\s+/, $_; + next if !exists $$href{($k=join(':', @buffer[0..2]))}; + next if $$href{$k}->{ref} !~ $buffer[3]; + next if $$href{$k}->{alt} !~ $buffer[4]; + splice(@buffer, 0, 5); + $$href{$k}->{$kname}=join(':', @buffer); + } + close IN; + } +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +my($varfile, $buildver, $outdir, $dir_1000g, $dir_dbsnp, $dir_cosmic, $release_1000g, $release_dbsnp, $release_cosmic, $outfile, $k, @buffer, @varlist, %opts, %varlist); + +GetOptions(\%opts, "varfile=s", "buildver=s", "outdir=s", "dir_1000g=s", "dir_dbsnp=s", "dir_cosmic=s", "release_1000g=s", "release_dbsnp=s", "release_cosmic=s", "outfile=s"); +$varfile = $opts{varfile}; +$buildver = $opts{buildver}; +$outdir = $opts{outdir}; +$dir_1000g = $opts{dir_1000g}; +$dir_dbsnp = $opts{dir_dbsnp}; +$dir_cosmic = $opts{dir_cosmic}; +$release_1000g = $opts{release_1000g}; +$release_dbsnp = $opts{release_dbsnp}; +$release_cosmic = $opts{release_cosmic}; +$outfile = $opts{outfile}; + +my $fname = readlink($varfile) || $varfile; +$fname = basename($fname); + +my %k=( + '1000g' => { + 'dir' => $dir_1000g, 'release' => $release_1000g, 'value' => join(':', ('0.00000')x5), 'header' => join(':', 'AF_ALL', 'AF_AFR', 'AF_AMR', 'AF_ASN', 'AF_EUR') + }, 'dbsnp' => { + 'dir' => $dir_dbsnp, 'release' => $release_dbsnp, 'value' => join(':', ('na')x2), 'header' => join(':', 'rs', 'dbsnp') + }, 'cosmic_var' => { + 'dir' => $dir_cosmic, 'release' => $release_cosmic, 'value' => join(':', '0.00000', 'na'), 'header' => join(':', 'AF_COS', 'cid') + } +); + +my %legend=( + 'chr' => 'chromosome identifier', + 'start' => "${buildver} 1-based start position", + 'end' => "${buildver} 1-based end position", + 'ref' => 'reference allele', + 'alt' => 'alternate allele', + 'QC' => 'Phred-scaled call quality', + 'NRF' => '#reads consistent w/ the reference allele on the F-strand', + 'NRR' => '#reads consistent w/ the reference allele on the R-strand', + 'NAF' => '#reads consistent w/ the alternate allele on the F-strand', + 'NAR' => '#reads consistent w/ the alternate allele on the R-strand', + 'DP' => 'total #reads in call ie. NRF+NRR+NAF+NAR', + 'AD' => 'total #reads consistent w/ the alternate allele ie. NAF+NAR', + 'AF' => 'alternate allele ratio ie. AD/DP', + 'VCF.FILTER' => 'FILTER field from the input vcf file', + 'DPT.FILTER' => 'check for heterogeneous depth in substituted blocks', + 'VAR.FILTER' => 'GFAP default FILTER to discriminate between TP and FP variants', + 'P.str' => 'NRF+NAF vs. NRR+NAR binomial test P-value ie. total strand bias', + 'P.ref' => 'NRF vs. NRR binomial test P-value ie. reference allele strand bias', + 'P.alt' => 'NAF vs. NAR binomial test P-value ie. alternate allele strand bias', + 'AF_ALL' => "global AF in ${release_1000g} 1000g data", + 'AF_AFR' => "AF in AFR ${release_1000g} 1000g data", + 'AF_AMR' => "AF in AMR ${release_1000g} 1000g data", + 'AF_ASN' => "AF in ASN ${release_1000g} 1000g data", + 'AF_EUR' => "AF in EUR ${release_1000g} 1000g data", + 'AF_COS' => "AF in ${release_cosmic} cosmic data", + 'rs' => "dbsnp rs identifier(s) from ${release_dbsnp} release", + 'dbsnp' => "dbsnp build version(s) from ${release_dbsnp} release", + 'cid' => "cosmic mutation identifier from ${release_cosmic} release" +); +my @header=('chr', 'start', 'end', 'ref', 'alt', 'DPT.FILTER', 'QC', 'NRF', 'NRR', 'NAF', 'NAR', 'VCF.FILTER', 'P.str', 'P.ref', 'P.alt', 'DP', 'AD', 'AF', 'VAR.FILTER'); +my @k=qw[ 1000g dbsnp cosmic_var ]; + +open IN, "<$varfile" or die $!; +while(){ + chomp; + @buffer=split /\s+/, $_; + $buffer[0]=~s/^chr(.+)$/$1/; + push @varlist, ($k=join(':', @buffer[0..2])); + shift(@buffer) for 0..2; + $varlist{$k}->{$_}=shift(@buffer) foreach qw[ ref alt ]; + $varlist{$k}->{cov}=join(':', (($buffer[0] eq 'unk')?'SKIP':'PASS'), @buffer[1..$#buffer]); + } +close IN; + +foreach $k (@k){ + push @header, split(/:/, $k{$k}->{header}); + varscan($k, $k{$k}->{file}, \%varlist); + } + +my @idx=(0..4,7..10,15..17,6,12..14,11,5,18..23,26..27,24..25); +open OUT, ">${outdir}/${fname}.dbi" or die $!; +print OUT '#', join(' = ', $_, $legend{$_}), "\n" foreach @header[@idx]; +print OUT '#', join("\t", @header[@idx]), "\n"; +foreach $k (@varlist){ + @buffer=(split(/:/, 'chr'.$k), $varlist{$k}->{ref}, $varlist{$k}->{alt}); + push @buffer, split(/:/, ($varlist{$k}->{$_} || $k{$_}->{value})) foreach ('cov', @k); + print OUT join("\t", @buffer[@idx]), "\n"; + } +close OUT; + +system "rm $outfile; ln -s ${outdir}/${fname}.dbi $outfile" and die $!; \ No newline at end of file diff -r f753b30013e6 -r 028f435b6cfb gfapts/gfap_r1.0_known_var_finder.xml diff -r f753b30013e6 -r 028f435b6cfb gfapts/gfap_r1.0_samvcf_data_parser.pl --- a/gfapts/gfap_r1.0_samvcf_data_parser.pl Fri Jun 29 10:20:55 2012 -0400 +++ b/gfapts/gfap_r1.0_samvcf_data_parser.pl Fri Aug 03 05:50:41 2012 -0400 @@ -1,8 +1,8 @@ #!/usr/bin/perl use strict; -use lib 'inc/perlmod'; -use ngsutil qw[ :DEFAULT &explode_varcall ]; +# use lib 'inc/perlmod'; +# use ngsutil qw[ :DEFAULT &explode_varcall ]; use warnings FATAL => qw[ numeric uninitialized ]; use List::Util qw[ sum min max ]; use File::Basename; @@ -13,6 +13,85 @@ my $rbin = '/usr/bin/R'; #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# TEMP include ngsutil.pm +sub explode_varcall{ + my $N=0; + $_=shift @_ foreach my($POS, $REF, $ALT); + $_=$POS foreach my($START, $END); + my(@length, @range, @idx, @VAR, @POS); + @{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS); + push @length, length($_) foreach ($REF, $ALT); + @range=sort{ $a<=>$b } @length; + if($range[0]==1){ + if($range[1]!=1){ + foreach ($REF, $ALT){ + $_=substr($_, 1); + $_=~s/^$/-/; + } + if($length[0]!=1){ + $END+=$length[0]-1; + $START++; + } + } + push @POS, $START, $END; + push @VAR, $REF, $ALT; + }else{ + my @N=(); + undef $_ foreach my ($i, $VAR); + $_-=2 foreach (@length, @range); + $_++ foreach ($START, $END); + $_=substr($_, 1) foreach ($REF, $ALT); + my $indel='-' x ($range[1]-$range[0]); + $VAR.=($_>$range[0])? + ('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))? + 0:1) for 0 .. $range[1]; + $N++ while $VAR =~ /0/g; + if($length[0]<$length[1]){ + @VAR=($VAR); + @N=($N); + $N=0; + undef($VAR); + $VAR.=($_>$range[0])? + ('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))? + 0:1) for reverse 0 .. $range[1]; + $N++ while $VAR =~ /0/g; + if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); } + else{ $REF=$indel . $REF; } + }else{ $ALT.=$indel; } + foreach (qw[ 0 \- ]){ + push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g); + } + @{$_}=() foreach (\@VAR, \@POS); + foreach my $k (@idx){ + push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT); + push @POS, ${$k}[0], sum(@{$k})-1; + } + $_+=$START foreach @POS; + $_=~s/\-+/\-/ foreach @VAR; + for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; } + } + return(\@POS, \@VAR); + } + +sub varscan{ + $_=shift @_ foreach my($kname, $fpath, $href); + my($k, @buffer); + open IN, "<$fpath" or die $!; + while(){ + next if /^#/; + chomp; + @buffer=split /\s+/, $_; + next if !exists $$href{($k=join(':', @buffer[0..2]))}; + next if $$href{$k}->{ref} !~ $buffer[3]; + next if $$href{$k}->{alt} !~ $buffer[4]; + splice(@buffer, 0, 5); + $$href{$k}->{$kname}=join(':', @buffer); + } + close IN; + } +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + my $annovar_dir = 'inc/annovar'; my $rdep = 'inc/R'; diff -r f753b30013e6 -r 028f435b6cfb gfapts/gfap_r1.0_samvcf_data_parser.pl~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gfapts/gfap_r1.0_samvcf_data_parser.pl~ Fri Aug 03 05:50:41 2012 -0400 @@ -0,0 +1,183 @@ +#!/usr/bin/perl + +use strict; +# use lib 'inc/perlmod'; +# use ngsutil qw[ :DEFAULT &explode_varcall ]; +use warnings FATAL => qw[ numeric uninitialized ]; +use List::Util qw[ sum min max ]; +use File::Basename; +use Getopt::Long; + +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# PATH TO YOUR R-bin DIRECTORY +my $rbin = '/usr/bin/R'; +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# ngsutil.pm +sub explode_varcall{ + my $N=0; + $_=shift @_ foreach my($POS, $REF, $ALT); + $_=$POS foreach my($START, $END); + my(@length, @range, @idx, @VAR, @POS); + @{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS); + push @length, length($_) foreach ($REF, $ALT); + @range=sort{ $a<=>$b } @length; + if($range[0]==1){ + if($range[1]!=1){ + foreach ($REF, $ALT){ + $_=substr($_, 1); + $_=~s/^$/-/; + } + if($length[0]!=1){ + $END+=$length[0]-1; + $START++; + } + } + push @POS, $START, $END; + push @VAR, $REF, $ALT; + }else{ + my @N=(); + undef $_ foreach my ($i, $VAR); + $_-=2 foreach (@length, @range); + $_++ foreach ($START, $END); + $_=substr($_, 1) foreach ($REF, $ALT); + my $indel='-' x ($range[1]-$range[0]); + $VAR.=($_>$range[0])? + ('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))? + 0:1) for 0 .. $range[1]; + $N++ while $VAR =~ /0/g; + if($length[0]<$length[1]){ + @VAR=($VAR); + @N=($N); + $N=0; + undef($VAR); + $VAR.=($_>$range[0])? + ('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))? + 0:1) for reverse 0 .. $range[1]; + $N++ while $VAR =~ /0/g; + if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); } + else{ $REF=$indel . $REF; } + }else{ $ALT.=$indel; } + foreach (qw[ 0 \- ]){ + push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g); + } + @{$_}=() foreach (\@VAR, \@POS); + foreach my $k (@idx){ + push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT); + push @POS, ${$k}[0], sum(@{$k})-1; + } + $_+=$START foreach @POS; + $_=~s/\-+/\-/ foreach @VAR; + for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; } + } + return(\@POS, \@VAR); + } + +sub varscan{ + $_=shift @_ foreach my($kname, $fpath, $href); + my($k, @buffer); + open IN, "<$fpath" or die $!; + while(){ + next if /^#/; + chomp; + @buffer=split /\s+/, $_; + next if !exists $$href{($k=join(':', @buffer[0..2]))}; + next if $$href{$k}->{ref} !~ $buffer[3]; + next if $$href{$k}->{alt} !~ $buffer[4]; + splice(@buffer, 0, 5); + $$href{$k}->{$kname}=join(':', @buffer); + } + close IN; + } +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +my $annovar_dir = 'inc/annovar'; +my $rdep = 'inc/R'; + +my($varfile, $outdir, $outfile, $i, @DP4, @buffer, @Temp, @previous, @fnames, %opts, %chr); + +GetOptions(\%opts, "varfile=s", "outdir=s", "outfile=s"); +$varfile = $opts{varfile}; +$outdir = $opts{outdir}; +$outfile = $opts{outfile}; + +my $fname = readlink($varfile) || $varfile; +$fname = basename($fname); + +my %fh=( + 'chr1' => *chr1, 'chr2' => *chr2, 'chr3' => *chr3, 'chr4' => *chr4, 'chr5' => *chr5, + 'chr6' => *chr6, 'chr7' => *chr7, 'chr8' => *chr8, 'chr9' => *chr9, 'chr10' => *chr10, + 'chr11' => *chr11, 'chr12' => *chr12, 'chr13' => *chr13, 'chr14' => *chr14, 'chr15' => *chr15, + 'chr16' => *chr16, 'chr17' => *chr17, 'chr18' => *chr18, 'chr19' => *chr19, 'chr20' => *chr20, + 'chr21' => *chr21, 'chr22' => *chr22, 'chrX' => *chrX, 'chrY' => *chrY, 'chrM' => *chrM +); + +`${annovar_dir}/convert2annovar.pl -format vcf4 $varfile -includeinfo > ${outdir}/${fname}_Temp-00 2> /dev/null` and die $!; + +open($fh{$_}, ">${outdir}/${fname}_${_}.Temp-00") or die $! foreach keys %fh; +open IN, "<${outdir}/${fname}_Temp-00" or die $!; +while(){ + /^(\S+)\s+(?:\S+\s+){2}(\S+)\s+(\S+)/; + next if !exists $fh{$1}; + if(min(length($2), length($3))!=1){ + chomp; + @buffer=split /\s+/, $_; + @Temp=explode_varcall(@buffer[1,3..4]); + for($i=0; $i<$#{$Temp[0]}; $i+=2){ + print{ $fh{$buffer[0]} } join("\t", $buffer[0], @{$Temp[0]}[$i..$i+1], @{$Temp[1]}[$i..$i+1], @buffer[6..$#buffer]), "\n"; + } + next; + } + print{ $fh{$1} } $_; + $chr{$1}++; + } +close IN; +foreach (keys %fh){ + close($fh{$_}); + next if !exists $chr{$_}; + `sort -k2,2n -k3,3n ${outdir}/${fname}_${_}.Temp-00 > ${outdir}/${fname}_${_}.Temp-01` and die $!; + open IN, "<${outdir}/${fname}_${_}.Temp-01" or die $!; + open OUT, ">${outdir}/${fname}_${_}.Temp-02" or die $!; + $_=readline(IN); + /^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/; + @buffer=split /\s+/, $1.$2; + ($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/; + @DP4=split /,/, $_; + push @buffer, @DP4; + @previous=@buffer; + MAINLOOP: while(){ + /^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/; + @buffer=split /\s+/, $1.$2; + ($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/; + @DP4=split /,/, $_; + push @buffer, @DP4; + while(($previous[0] eq $buffer[0]) && ($buffer[2]==$previous[2]+1) && (join('', @previous[3..4]) !~ /-/) && (join('', @buffer[3..4]) !~ /-/)){ + $previous[2]=$buffer[2]; + $previous[$_].=$buffer[$_] for 3..4; + $previous[5]='unk' if $previous[5] ne $buffer[5]; + $previous[7]='SKIP' if $previous[7] ne $buffer[7]; + for (6,8..11){ + $previous[$_]+=$buffer[$_]; + $previous[$_]/=2; + } + next MAINLOOP; + } + $previous[7]='NONE' if $previous[7] eq '.'; + $previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11); + print OUT join("\t", @previous[0..6,8..11,7]), "\n"; + @Temp=@previous if eof; + @previous=@buffer; + } + $previous[7]='NONE' if $previous[7] eq '.'; + $previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11); + print OUT join("\t", @previous[0..6,8..11,7]), "\n" if(join('_', @Temp[1..2]) ne join('_', @previous[1..2])); + close IN; + close OUT; + } +foreach (1..22, 'X', 'Y', 'M'){ + push @fnames, "${outdir}/${fname}_chr${_}.Temp-02" if exists $chr{"chr$_"}; + } +system join(' ', 'cat', @fnames, '>', "${outdir}/${fname}.Temp.2R") and die $!; +`${rbin} --vanilla --slave --args ${outdir}/${fname}.Temp.2R < ${rdep}/samvcf_data_parser.R` and die $!; +system "rm ${outdir}/${fname}*Temp* $outfile; ln -s ${outdir}/${fname}.var $outfile" and die $!; \ No newline at end of file diff -r f753b30013e6 -r 028f435b6cfb gfapts/gfap_r1.0_samvcf_data_parser.xml