Mercurial > repos > dereeper > sniplay3
diff FilterVCFonAnnotations.pl @ 2:15319113c0a5 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 12 Feb 2015 15:54:24 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FilterVCFonAnnotations.pl Thu Feb 12 15:54:24 2015 -0500 @@ -0,0 +1,171 @@ +#!/usr/bin/perl + +use strict; +use Getopt::Long; + +my $usage = qq~Usage:$0 <args> [<opts>] + +where <args> are: + + -i, --input <VCF input> + -o, --out <VCF output> + + <opts> are: + + -g, --genelist <file listing the genes to be filtered> + -f, --feature <genomic feature: Exon,INTRON,UTR_3_PRIME,UTR_5_PRIME,DOWNSTREAM,UPSTREAM or INTERGENIC> + -s, --syn <keep only synonymous (s), non-synonymous (n)> +~; +$usage .= "\n"; + +my ($input,$out); + +my $filter_position = ""; +my $filter_synonyme = ""; + +my $genelist; + +GetOptions( + "input=s" => \$input, + "out=s" => \$out, + "genelist=s" => \$genelist, + "feature=s" => \$filter_position, + "syn=s" => \$filter_synonyme, +); + + +die $usage + if ( !$input || !$out); + + +open(my $O1,">$out.header"); +my $header = ""; +open(my $VCF,$input); +while(<$VCF>) +{ + my $line = $_; + chomp($line); + my @infos = split(/\t/,$line); + + if (scalar @infos > 9) + { + if (!/^#/) + { + last; + } + } + print $O1 $_; +} +close($O1); + +if ($filter_synonyme =~/\w/) +{ + if ($filter_synonyme eq "s") + { + system("grep ',SYNONYMOUS_' $input >$out.f1"); + system("grep '=SYNONYMOUS_' $input >$out.f2"); + } + if ($filter_synonyme eq "n") + { + system("grep ',NON_SYNONYMOUS_' $input >$out.f1"); + system("grep '=NON_SYNONYMOUS_' $input >$out.f2"); + } + system("cat $out.header >$out.r1"); + system("cat $out.f1 >>$out.r1"); + system("cat $out.f2 >>$out.r1"); +} +else +{ + system("cp -rf $input $out.r1"); +} +if ($filter_position =~/\w+/) +{ + system("grep $filter_position $out.r1 >$out.f3"); + system("cat $out.header >>$out.r2"); + system("cat $out.f3 >>$out.r2"); +} +else +{ + system("cp -rf $out.r1 $out.r2"); +} + +if ($genelist) +{ + my %genes_to_be_analyzed; + open(my $F,$genelist); + while(<$F>) + { + my $line = $_; + $line =~s/\n//g; + $line =~s/\r//g; + $genes_to_be_analyzed{$line} = 1; + } + close($F); + + my $num_line = 0; + open(my $OUT,">$out"); + open(my $VCF,"$out.r2"); + while(<$VCF>) + { + my $line = $_; + chomp($line); + my @infos = split(/\t/,$line); + + if (scalar @infos > 9) + { + if (!/^#/) + { + $num_line++; + + my $chromosome = $infos[0]; + my $chromosome_position = $infos[1]; + my $ref_allele = $infos[3]; + my $alt_allele = $infos[4]; + my $quality = $infos[6]; + my $info = $infos[7]; + my $is_in_exon = "#"; + my $is_synonyme = "#"; + my $gene = "intergenic"; + my $modif_codon = "#"; + my $modif_aa = "#"; + my $geneposition; + if ($info =~/EFF=(.*)/) + { + my @annotations = split(",",$1); + foreach my $annotation(@annotations) + { + my ($syn, $additional) = split(/\(/,$annotation); + + + $is_in_exon = "intron"; + if ($syn =~/UTR/) + { + $is_in_exon = $syn; + } + else + { + $is_synonyme = $syn; + } + my @infos_additional = split(/\|/,$additional); + $gene = $infos_additional[4]; + } + } + + if (!$genes_to_be_analyzed{$gene}){next;} + } + } + print $OUT $_; + } + close($VCF); + close($OUT); +} +else +{ + system("cp $out.r2 $out"); +} +unlink("$out.f1"); +unlink("$out.f2"); +unlink("$out.f3"); +unlink("$out.r1"); +unlink("$out.r2"); +unlink("$out.header");