annotate filter_by_list @ 0:f92e6aff30b7 default tip

initial commit
author Yusuf Ali <ali@yusuf.email>
date Wed, 25 Mar 2015 13:35:07 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
1 #!/usr/bin/env perl
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
2
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
3 # Report lines of a file that have as one of the column values a value from the pattern file
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
4 @ARGV == 6 or @ARGV == 7 or die "Usage: $0 <True|False (case sensitive)> <input.tab> <file of patterns> <matching output.tab> <num header line to retain> <column #> [nonmatching output.tab]\n";
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
5
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
6 open(PATTERNS, $ARGV[2])
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
7 or die "Cannot open $ARGV[1] for reading: $!\n";
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
8 my @alts;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
9 while(<PATTERNS>){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
10 chomp;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
11 push @alts, quotemeta($_);
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
12 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
13 close(PATTERNS);
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
14
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
15 my $regex = "(?:\\A|\\t|; )(?:".join("|", @alts).")(?:; |\\t|\\Z)";
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
16 #print STDERR "Regex is $regex\n";
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
17 open(OUT, ">$ARGV[3]")
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
18 or die "Cannot open $ARGV[3] for writing: $!\n";
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
19 open(TAB, $ARGV[1])
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
20 or die "Cannot open $ARGV[1] for reading: $!\n";
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
21 if(@ARGV == 7){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
22 open(NONMATCH, ">$ARGV[6]")
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
23 or die "Cannot open $ARGV[6] for writing: $!\n";
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
24 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
25 my $num_header_lines = $ARGV[4];
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
26 if($num_header_lines > 0){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
27 while($num_header_lines--){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
28 my $header_line = <TAB>;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
29 print OUT $header_line;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
30 print NONMATCH $header_line if @ARGV == 6;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
31 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
32 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
33 if($ARGV[0] =~ /^[t1]/i){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
34 my @F = split /\t/, $_;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
35 while(<TAB>){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
36 if($F[$ARGV[5]] =~ /$regex/o or $num_header_lines == -1 and /^#/){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
37 print OUT $_;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
38 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
39 elsif(@ARGV == 7){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
40 print NONMATCH $_;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
41 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
42 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
43 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
44 else{ # case insensitive
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
45 while(<TAB>){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
46 my @F = split /\t/, $_;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
47 if($F[$ARGV[5]] =~ /$regex/io or $num_header_lines == -1 and /^#/){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
48 # print STDERR $F[$ARGV[5]], "\n";
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
49 print OUT $_;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
50 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
51 elsif(@ARGV == 7){
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
52 print NONMATCH $_;
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
53 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
54 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
55 }
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
56 close(TAB);
f92e6aff30b7 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
57 close(OUT);