Mercurial > repos > yusuf > filter_table_by_names
view filter_by_list @ 0:f92e6aff30b7 default tip
initial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Wed, 25 Mar 2015 13:35:07 -0600 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env perl # Report lines of a file that have as one of the column values a value from the pattern file @ARGV == 6 or @ARGV == 7 or die "Usage: $0 <True|False (case sensitive)> <input.tab> <file of patterns> <matching output.tab> <num header line to retain> <column #> [nonmatching output.tab]\n"; open(PATTERNS, $ARGV[2]) or die "Cannot open $ARGV[1] for reading: $!\n"; my @alts; while(<PATTERNS>){ chomp; push @alts, quotemeta($_); } close(PATTERNS); my $regex = "(?:\\A|\\t|; )(?:".join("|", @alts).")(?:; |\\t|\\Z)"; #print STDERR "Regex is $regex\n"; open(OUT, ">$ARGV[3]") or die "Cannot open $ARGV[3] for writing: $!\n"; open(TAB, $ARGV[1]) or die "Cannot open $ARGV[1] for reading: $!\n"; if(@ARGV == 7){ open(NONMATCH, ">$ARGV[6]") or die "Cannot open $ARGV[6] for writing: $!\n"; } my $num_header_lines = $ARGV[4]; if($num_header_lines > 0){ while($num_header_lines--){ my $header_line = <TAB>; print OUT $header_line; print NONMATCH $header_line if @ARGV == 6; } } if($ARGV[0] =~ /^[t1]/i){ my @F = split /\t/, $_; while(<TAB>){ if($F[$ARGV[5]] =~ /$regex/o or $num_header_lines == -1 and /^#/){ print OUT $_; } elsif(@ARGV == 7){ print NONMATCH $_; } } } else{ # case insensitive while(<TAB>){ my @F = split /\t/, $_; if($F[$ARGV[5]] =~ /$regex/io or $num_header_lines == -1 and /^#/){ # print STDERR $F[$ARGV[5]], "\n"; print OUT $_; } elsif(@ARGV == 7){ print NONMATCH $_; } } } close(TAB); close(OUT);