comparison filter_by_list @ 0:f92e6aff30b7 default tip

initial commit
author Yusuf Ali <ali@yusuf.email>
date Wed, 25 Mar 2015 13:35:07 -0600
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f92e6aff30b7
1 #!/usr/bin/env perl
2
3 # Report lines of a file that have as one of the column values a value from the pattern file
4 @ARGV == 6 or @ARGV == 7 or die "Usage: $0 <True|False (case sensitive)> <input.tab> <file of patterns> <matching output.tab> <num header line to retain> <column #> [nonmatching output.tab]\n";
5
6 open(PATTERNS, $ARGV[2])
7 or die "Cannot open $ARGV[1] for reading: $!\n";
8 my @alts;
9 while(<PATTERNS>){
10 chomp;
11 push @alts, quotemeta($_);
12 }
13 close(PATTERNS);
14
15 my $regex = "(?:\\A|\\t|; )(?:".join("|", @alts).")(?:; |\\t|\\Z)";
16 #print STDERR "Regex is $regex\n";
17 open(OUT, ">$ARGV[3]")
18 or die "Cannot open $ARGV[3] for writing: $!\n";
19 open(TAB, $ARGV[1])
20 or die "Cannot open $ARGV[1] for reading: $!\n";
21 if(@ARGV == 7){
22 open(NONMATCH, ">$ARGV[6]")
23 or die "Cannot open $ARGV[6] for writing: $!\n";
24 }
25 my $num_header_lines = $ARGV[4];
26 if($num_header_lines > 0){
27 while($num_header_lines--){
28 my $header_line = <TAB>;
29 print OUT $header_line;
30 print NONMATCH $header_line if @ARGV == 6;
31 }
32 }
33 if($ARGV[0] =~ /^[t1]/i){
34 my @F = split /\t/, $_;
35 while(<TAB>){
36 if($F[$ARGV[5]] =~ /$regex/o or $num_header_lines == -1 and /^#/){
37 print OUT $_;
38 }
39 elsif(@ARGV == 7){
40 print NONMATCH $_;
41 }
42 }
43 }
44 else{ # case insensitive
45 while(<TAB>){
46 my @F = split /\t/, $_;
47 if($F[$ARGV[5]] =~ /$regex/io or $num_header_lines == -1 and /^#/){
48 # print STDERR $F[$ARGV[5]], "\n";
49 print OUT $_;
50 }
51 elsif(@ARGV == 7){
52 print NONMATCH $_;
53 }
54 }
55 }
56 close(TAB);
57 close(OUT);