Mercurial > repos > yusuf > miseq_bam_variants
diff split_hgvs_by_confidence @ 0:1a23ea467feb default tip
intial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Thu, 26 Mar 2015 09:36:17 -0600 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_hgvs_by_confidence Thu Mar 26 09:36:17 2015 -0600 @@ -0,0 +1,42 @@ +#!/usr/bin/env perl + +@ARGV > 3 or die "Usage: $0 <input.combined.hgvs.txt> <confident_out.hgvs.txt> <marginal_out.hgvs.txt> <min num sources> [alt min regex]\n"; + +my $infile = shift @ARGV; +my $confident_outfile = shift @ARGV; +my $marginal_outfile = shift @ARGV; +my $min_sources = shift @ARGV; +my $alt_regex = @ARGV ? shift @ARGV : undef; + +open(IN, $infile) + or die "Cannot open $infile for reading: $!\n"; +open(CONFIDENT, ">$confident_outfile") + or die "Cannot open $confident_outfile for writing: $!\n"; +open(MARGINAL, ">$marginal_outfile") + or die "Cannot open $marginal_outfile for writing: $!\n"; + +my $header = <IN>; +print CONFIDENT $header; +print MARGINAL $header; +chomp $header; +my @headers = split /\t/, $header; +my $srcs_column; +for(my $i = 0; $i <= $#headers; $i++){ + if($headers[$i] eq "Sources"){ + $srcs_column = $i; + } +} +die "Cannot find Sources column in header of $infile, aborting.\n" if not defined $srcs_column; + +while(<IN>){ + my @F = split /\t/, $_; + my @sources = split /; /, $F[$#F]; + if(@sources >= $min_sources or defined $alt_regex and $F[$#F] =~ /$alt_regex/o){ + print CONFIDENT $_; + } + else{ + print MARGINAL $_; + } +} +close(CONFIDENT); +close(MARGINAL);