annotate split_hgvs_by_confidence @ 0:1a23ea467feb default tip

intial commit
author Yusuf Ali <ali@yusuf.email>
date Thu, 26 Mar 2015 09:36:17 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
1 #!/usr/bin/env perl
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
2
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
3 @ARGV > 3 or die "Usage: $0 <input.combined.hgvs.txt> <confident_out.hgvs.txt> <marginal_out.hgvs.txt> <min num sources> [alt min regex]\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
4
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
5 my $infile = shift @ARGV;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
6 my $confident_outfile = shift @ARGV;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
7 my $marginal_outfile = shift @ARGV;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
8 my $min_sources = shift @ARGV;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
9 my $alt_regex = @ARGV ? shift @ARGV : undef;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
10
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
11 open(IN, $infile)
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
12 or die "Cannot open $infile for reading: $!\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
13 open(CONFIDENT, ">$confident_outfile")
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
14 or die "Cannot open $confident_outfile for writing: $!\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
15 open(MARGINAL, ">$marginal_outfile")
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
16 or die "Cannot open $marginal_outfile for writing: $!\n";
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
17
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
18 my $header = <IN>;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
19 print CONFIDENT $header;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
20 print MARGINAL $header;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
21 chomp $header;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
22 my @headers = split /\t/, $header;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
23 my $srcs_column;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
24 for(my $i = 0; $i <= $#headers; $i++){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
25 if($headers[$i] eq "Sources"){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
26 $srcs_column = $i;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
27 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
28 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
29 die "Cannot find Sources column in header of $infile, aborting.\n" if not defined $srcs_column;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
30
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
31 while(<IN>){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
32 my @F = split /\t/, $_;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
33 my @sources = split /; /, $F[$#F];
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
34 if(@sources >= $min_sources or defined $alt_regex and $F[$#F] =~ /$alt_regex/o){
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
35 print CONFIDENT $_;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
36 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
37 else{
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
38 print MARGINAL $_;
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
39 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
40 }
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
41 close(CONFIDENT);
1a23ea467feb intial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
42 close(MARGINAL);