Mercurial > repos > dereeper > sniplay3
view FilterVCFonAnnotations.pl @ 2:15319113c0a5 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 12 Feb 2015 15:54:24 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use Getopt::Long; my $usage = qq~Usage:$0 <args> [<opts>] where <args> are: -i, --input <VCF input> -o, --out <VCF output> <opts> are: -g, --genelist <file listing the genes to be filtered> -f, --feature <genomic feature: Exon,INTRON,UTR_3_PRIME,UTR_5_PRIME,DOWNSTREAM,UPSTREAM or INTERGENIC> -s, --syn <keep only synonymous (s), non-synonymous (n)> ~; $usage .= "\n"; my ($input,$out); my $filter_position = ""; my $filter_synonyme = ""; my $genelist; GetOptions( "input=s" => \$input, "out=s" => \$out, "genelist=s" => \$genelist, "feature=s" => \$filter_position, "syn=s" => \$filter_synonyme, ); die $usage if ( !$input || !$out); open(my $O1,">$out.header"); my $header = ""; open(my $VCF,$input); while(<$VCF>) { my $line = $_; chomp($line); my @infos = split(/\t/,$line); if (scalar @infos > 9) { if (!/^#/) { last; } } print $O1 $_; } close($O1); if ($filter_synonyme =~/\w/) { if ($filter_synonyme eq "s") { system("grep ',SYNONYMOUS_' $input >$out.f1"); system("grep '=SYNONYMOUS_' $input >$out.f2"); } if ($filter_synonyme eq "n") { system("grep ',NON_SYNONYMOUS_' $input >$out.f1"); system("grep '=NON_SYNONYMOUS_' $input >$out.f2"); } system("cat $out.header >$out.r1"); system("cat $out.f1 >>$out.r1"); system("cat $out.f2 >>$out.r1"); } else { system("cp -rf $input $out.r1"); } if ($filter_position =~/\w+/) { system("grep $filter_position $out.r1 >$out.f3"); system("cat $out.header >>$out.r2"); system("cat $out.f3 >>$out.r2"); } else { system("cp -rf $out.r1 $out.r2"); } if ($genelist) { my %genes_to_be_analyzed; open(my $F,$genelist); while(<$F>) { my $line = $_; $line =~s/\n//g; $line =~s/\r//g; $genes_to_be_analyzed{$line} = 1; } close($F); my $num_line = 0; open(my $OUT,">$out"); open(my $VCF,"$out.r2"); while(<$VCF>) { my $line = $_; chomp($line); my @infos = split(/\t/,$line); if (scalar @infos > 9) { if (!/^#/) { $num_line++; my $chromosome = $infos[0]; my $chromosome_position = $infos[1]; my $ref_allele = $infos[3]; my $alt_allele = $infos[4]; my $quality = $infos[6]; my $info = $infos[7]; my $is_in_exon = "#"; my $is_synonyme = "#"; my $gene = "intergenic"; my $modif_codon = "#"; my $modif_aa = "#"; my $geneposition; if ($info =~/EFF=(.*)/) { my @annotations = split(",",$1); foreach my $annotation(@annotations) { my ($syn, $additional) = split(/\(/,$annotation); $is_in_exon = "intron"; if ($syn =~/UTR/) { $is_in_exon = $syn; } else { $is_synonyme = $syn; } my @infos_additional = split(/\|/,$additional); $gene = $infos_additional[4]; } } if (!$genes_to_be_analyzed{$gene}){next;} } } print $OUT $_; } close($VCF); close($OUT); } else { system("cp $out.r2 $out"); } unlink("$out.f1"); unlink("$out.f2"); unlink("$out.f3"); unlink("$out.r1"); unlink("$out.r2"); unlink("$out.header");