0
|
1 #!/usr/bin/env perl
|
|
2
|
|
3 # Report lines of a file that have as one of the column values a value from the pattern file
|
|
4 @ARGV == 6 or @ARGV == 7 or die "Usage: $0 <True|False (case sensitive)> <input.tab> <file of patterns> <matching output.tab> <num header line to retain> <column #> [nonmatching output.tab]\n";
|
|
5
|
|
6 open(PATTERNS, $ARGV[2])
|
|
7 or die "Cannot open $ARGV[1] for reading: $!\n";
|
|
8 my @alts;
|
|
9 while(<PATTERNS>){
|
|
10 chomp;
|
|
11 push @alts, quotemeta($_);
|
|
12 }
|
|
13 close(PATTERNS);
|
|
14
|
|
15 my $regex = "(?:\\A|\\t|; )(?:".join("|", @alts).")(?:; |\\t|\\Z)";
|
|
16 #print STDERR "Regex is $regex\n";
|
|
17 open(OUT, ">$ARGV[3]")
|
|
18 or die "Cannot open $ARGV[3] for writing: $!\n";
|
|
19 open(TAB, $ARGV[1])
|
|
20 or die "Cannot open $ARGV[1] for reading: $!\n";
|
|
21 if(@ARGV == 7){
|
|
22 open(NONMATCH, ">$ARGV[6]")
|
|
23 or die "Cannot open $ARGV[6] for writing: $!\n";
|
|
24 }
|
|
25 my $num_header_lines = $ARGV[4];
|
|
26 if($num_header_lines > 0){
|
|
27 while($num_header_lines--){
|
|
28 my $header_line = <TAB>;
|
|
29 print OUT $header_line;
|
|
30 print NONMATCH $header_line if @ARGV == 6;
|
|
31 }
|
|
32 }
|
|
33 if($ARGV[0] =~ /^[t1]/i){
|
|
34 my @F = split /\t/, $_;
|
|
35 while(<TAB>){
|
|
36 if($F[$ARGV[5]] =~ /$regex/o or $num_header_lines == -1 and /^#/){
|
|
37 print OUT $_;
|
|
38 }
|
|
39 elsif(@ARGV == 7){
|
|
40 print NONMATCH $_;
|
|
41 }
|
|
42 }
|
|
43 }
|
|
44 else{ # case insensitive
|
|
45 while(<TAB>){
|
|
46 my @F = split /\t/, $_;
|
|
47 if($F[$ARGV[5]] =~ /$regex/io or $num_header_lines == -1 and /^#/){
|
|
48 # print STDERR $F[$ARGV[5]], "\n";
|
|
49 print OUT $_;
|
|
50 }
|
|
51 elsif(@ARGV == 7){
|
|
52 print NONMATCH $_;
|
|
53 }
|
|
54 }
|
|
55 }
|
|
56 close(TAB);
|
|
57 close(OUT);
|