0
|
1 #!/usr/bin/env perl
|
|
2
|
|
3 @ARGV > 3 or die "Usage: $0 <input.combined.hgvs.txt> <confident_out.hgvs.txt> <marginal_out.hgvs.txt> <min num sources> [alt min regex]\n";
|
|
4
|
|
5 my $infile = shift @ARGV;
|
|
6 my $confident_outfile = shift @ARGV;
|
|
7 my $marginal_outfile = shift @ARGV;
|
|
8 my $min_sources = shift @ARGV;
|
|
9 my $alt_regex = @ARGV ? shift @ARGV : undef;
|
|
10
|
|
11 open(IN, $infile)
|
|
12 or die "Cannot open $infile for reading: $!\n";
|
|
13 open(CONFIDENT, ">$confident_outfile")
|
|
14 or die "Cannot open $confident_outfile for writing: $!\n";
|
|
15 open(MARGINAL, ">$marginal_outfile")
|
|
16 or die "Cannot open $marginal_outfile for writing: $!\n";
|
|
17
|
|
18 my $header = <IN>;
|
|
19 print CONFIDENT $header;
|
|
20 print MARGINAL $header;
|
|
21 chomp $header;
|
|
22 my @headers = split /\t/, $header;
|
|
23 my $srcs_column;
|
|
24 for(my $i = 0; $i <= $#headers; $i++){
|
|
25 if($headers[$i] eq "Sources"){
|
|
26 $srcs_column = $i;
|
|
27 }
|
|
28 }
|
|
29 die "Cannot find Sources column in header of $infile, aborting.\n" if not defined $srcs_column;
|
|
30
|
|
31 while(<IN>){
|
|
32 my @F = split /\t/, $_;
|
|
33 my @sources = split /; /, $F[$#F];
|
|
34 if(@sources >= $min_sources or defined $alt_regex and $F[$#F] =~ /$alt_regex/o){
|
|
35 print CONFIDENT $_;
|
|
36 }
|
|
37 else{
|
|
38 print MARGINAL $_;
|
|
39 }
|
|
40 }
|
|
41 close(CONFIDENT);
|
|
42 close(MARGINAL);
|