view split_hgvs_by_confidence @ 0:1a23ea467feb default tip

intial commit
author Yusuf Ali <ali@yusuf.email>
date Thu, 26 Mar 2015 09:36:17 -0600
parents
children
line wrap: on
line source

#!/usr/bin/env perl

@ARGV > 3 or die "Usage: $0 <input.combined.hgvs.txt> <confident_out.hgvs.txt> <marginal_out.hgvs.txt> <min num sources> [alt min regex]\n";

my $infile = shift @ARGV;
my $confident_outfile = shift @ARGV;
my $marginal_outfile = shift @ARGV;
my $min_sources = shift @ARGV;
my $alt_regex = @ARGV ? shift @ARGV : undef;

open(IN, $infile)
  or die "Cannot open $infile for reading: $!\n";
open(CONFIDENT, ">$confident_outfile")
  or die "Cannot open $confident_outfile for writing: $!\n";
open(MARGINAL, ">$marginal_outfile")
  or die "Cannot open $marginal_outfile for writing: $!\n";

my $header = <IN>;
print CONFIDENT $header;
print MARGINAL $header;
chomp $header;
my @headers = split /\t/, $header;
my $srcs_column;
for(my $i = 0; $i <= $#headers; $i++){
  if($headers[$i] eq "Sources"){
    $srcs_column = $i;
  }
}
die "Cannot find Sources column in header of $infile, aborting.\n" if not defined $srcs_column;

while(<IN>){
  my @F = split /\t/, $_;
  my @sources = split /; /, $F[$#F];
  if(@sources >= $min_sources or defined $alt_regex and $F[$#F] =~ /$alt_regex/o){
    print CONFIDENT $_;
  }
  else{
    print MARGINAL $_;
  }
}
close(CONFIDENT);
close(MARGINAL);