Mercurial > repos > yusuf > filter_table_by_names
comparison filter_by_list @ 0:f92e6aff30b7 default tip
initial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Wed, 25 Mar 2015 13:35:07 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f92e6aff30b7 |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 # Report lines of a file that have as one of the column values a value from the pattern file | |
4 @ARGV == 6 or @ARGV == 7 or die "Usage: $0 <True|False (case sensitive)> <input.tab> <file of patterns> <matching output.tab> <num header line to retain> <column #> [nonmatching output.tab]\n"; | |
5 | |
6 open(PATTERNS, $ARGV[2]) | |
7 or die "Cannot open $ARGV[1] for reading: $!\n"; | |
8 my @alts; | |
9 while(<PATTERNS>){ | |
10 chomp; | |
11 push @alts, quotemeta($_); | |
12 } | |
13 close(PATTERNS); | |
14 | |
15 my $regex = "(?:\\A|\\t|; )(?:".join("|", @alts).")(?:; |\\t|\\Z)"; | |
16 #print STDERR "Regex is $regex\n"; | |
17 open(OUT, ">$ARGV[3]") | |
18 or die "Cannot open $ARGV[3] for writing: $!\n"; | |
19 open(TAB, $ARGV[1]) | |
20 or die "Cannot open $ARGV[1] for reading: $!\n"; | |
21 if(@ARGV == 7){ | |
22 open(NONMATCH, ">$ARGV[6]") | |
23 or die "Cannot open $ARGV[6] for writing: $!\n"; | |
24 } | |
25 my $num_header_lines = $ARGV[4]; | |
26 if($num_header_lines > 0){ | |
27 while($num_header_lines--){ | |
28 my $header_line = <TAB>; | |
29 print OUT $header_line; | |
30 print NONMATCH $header_line if @ARGV == 6; | |
31 } | |
32 } | |
33 if($ARGV[0] =~ /^[t1]/i){ | |
34 my @F = split /\t/, $_; | |
35 while(<TAB>){ | |
36 if($F[$ARGV[5]] =~ /$regex/o or $num_header_lines == -1 and /^#/){ | |
37 print OUT $_; | |
38 } | |
39 elsif(@ARGV == 7){ | |
40 print NONMATCH $_; | |
41 } | |
42 } | |
43 } | |
44 else{ # case insensitive | |
45 while(<TAB>){ | |
46 my @F = split /\t/, $_; | |
47 if($F[$ARGV[5]] =~ /$regex/io or $num_header_lines == -1 and /^#/){ | |
48 # print STDERR $F[$ARGV[5]], "\n"; | |
49 print OUT $_; | |
50 } | |
51 elsif(@ARGV == 7){ | |
52 print NONMATCH $_; | |
53 } | |
54 } | |
55 } | |
56 close(TAB); | |
57 close(OUT); |