| 
0
 | 
     1 #!/usr/bin/env perl
 | 
| 
 | 
     2 use strict;
 | 
| 
 | 
     3 use warnings;
 | 
| 
 | 
     4 use Getopt::Std;
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 sub parse_command_line();
 | 
| 
 | 
     7 sub build_regex_string();
 | 
| 
 | 
     8 sub usage();
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 my $input_file ;
 | 
| 
 | 
    11 my $output_file;
 | 
| 
 | 
    12 my $find_pattern ;
 | 
| 
 | 
    13 my $replace_pattern ;
 | 
| 
 | 
    14 my $find_complete_words ;
 | 
| 
 | 
    15 my $find_pattern_is_regex ;
 | 
| 
 | 
    16 my $find_in_specific_column ;
 | 
| 
 | 
    17 my $find_case_insensitive ;
 | 
| 
 | 
    18 my $replace_global ;
 | 
| 
 | 
    19 my $skip_first_line ;
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 
 | 
| 
 | 
    22 ##
 | 
| 
 | 
    23 ## Program Start
 | 
| 
 | 
    24 ##
 | 
| 
 | 
    25 usage() if @ARGV<2;
 | 
| 
 | 
    26 parse_command_line();
 | 
| 
 | 
    27 my $regex_string = build_regex_string() ;
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 # Allow first line to pass without filtering?
 | 
| 
 | 
    30 if ( $skip_first_line ) {
 | 
| 
 | 
    31 	my $line = <$input_file>;
 | 
| 
 | 
    32 	print $output_file $line ;
 | 
| 
 | 
    33 }
 | 
| 
 | 
    34 
 | 
| 
 | 
    35 
 | 
| 
 | 
    36 ##
 | 
| 
 | 
    37 ## Main loop
 | 
| 
 | 
    38 ##
 | 
| 
 | 
    39 
 | 
| 
 | 
    40 ## I LOVE PERL (and hate it, at the same time...)
 | 
| 
 | 
    41 ##
 | 
| 
 | 
    42 ## So what's going on with the self-compiling perl code?
 | 
| 
 | 
    43 ##
 | 
| 
 | 
    44 ## 1. The program gets the find-pattern and the replace-pattern from the user (as strings).
 | 
| 
 | 
    45 ## 2. If both the find-pattern and replace-pattern are simple strings (not regex), 
 | 
| 
 | 
    46 ##    it would be possible to pre-compile a regex (with qr//) and use it in a 's///'
 | 
| 
 | 
    47 ## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references)
 | 
| 
 | 
    48 ##    it is still possible to pre-compile the regex and use it in a 's///'
 | 
| 
 | 
    49 ## However,
 | 
| 
 | 
    50 ## 4. If the replace-pattern contains back-references, pre-compiling is not possible.
 | 
| 
 | 
    51 ##    (in perl, you can't precompile a substitute regex).
 | 
| 
 | 
    52 ##    See these examples:
 | 
| 
 | 
    53 ##    http://www.perlmonks.org/?node_id=84420
 | 
| 
 | 
    54 ##    http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl
 | 
| 
 | 
    55 ##
 | 
| 
 | 
    56 ##    The solution:
 | 
| 
 | 
    57 ##    we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ),
 | 
| 
 | 
    58 ##    Then eval() a new perl code that contains the substitution regex as inlined code.
 | 
| 
 | 
    59 ##    Gotta love perl!
 | 
| 
 | 
    60 
 | 
| 
 | 
    61 my $perl_program ;
 | 
| 
 | 
    62 if ( $find_in_specific_column ) {
 | 
| 
 | 
    63 	# Find & replace in specific column
 | 
| 
 | 
    64 
 | 
| 
 | 
    65 	$perl_program = <<EOF;
 | 
| 
 | 
    66 	while ( <STDIN> ) {
 | 
| 
 | 
    67 		chomp ;
 | 
| 
 | 
    68 		my \@columns = split ;
 | 
| 
 | 
    69 
 | 
| 
 | 
    70 		#not enough columns in this line - skip it
 | 
| 
 | 
    71 		next if ( \@columns < $find_in_specific_column ) ;
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 		\$columns [ $find_in_specific_column - 1 ] =~ $regex_string ;
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 		print STDOUT join("\t", \@columns), "\n" ;
 | 
| 
 | 
    76 	}
 | 
| 
 | 
    77 EOF
 | 
| 
 | 
    78 
 | 
| 
 | 
    79 } else {
 | 
| 
 | 
    80 	# Find & replace the entire line
 | 
| 
 | 
    81 	$perl_program = <<EOF;
 | 
| 
 | 
    82 		while ( <STDIN> ) {
 | 
| 
 | 
    83 			$regex_string ;
 | 
| 
 | 
    84 			print STDOUT;
 | 
| 
 | 
    85 		}
 | 
| 
 | 
    86 EOF
 | 
| 
 | 
    87 }
 | 
| 
 | 
    88 
 | 
| 
 | 
    89 
 | 
| 
 | 
    90 # The dynamic perl code reads from STDIN and writes to STDOUT,
 | 
| 
 | 
    91 # so connect these handles (if the user didn't specifiy input / output
 | 
| 
 | 
    92 # file names, these might be already be STDIN/OUT, so the whole could be a no-op).
 | 
| 
 | 
    93 *STDIN = $input_file ;
 | 
| 
 | 
    94 *STDOUT = $output_file ;
 | 
| 
 | 
    95 eval $perl_program ;
 | 
| 
 | 
    96 
 | 
| 
 | 
    97 
 | 
| 
 | 
    98 ##
 | 
| 
 | 
    99 ## Program end
 | 
| 
 | 
   100 ##
 | 
| 
 | 
   101 
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 sub parse_command_line()
 | 
| 
 | 
   104 {
 | 
| 
 | 
   105 	my %opts ;
 | 
| 
 | 
   106 	getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n";
 | 
| 
 | 
   107 
 | 
| 
 | 
   108 	die "$0: missing Find-Pattern argument\n" if (@ARGV==0); 
 | 
| 
 | 
   109 	$find_pattern = $ARGV[0];
 | 
| 
 | 
   110 	die "$0: missing Replace-Pattern argument\n" if (@ARGV==1); 
 | 
| 
 | 
   111 	$replace_pattern = $ARGV[1];
 | 
| 
 | 
   112 
 | 
| 
 | 
   113 	$find_complete_words = ( exists $opts{w} ) ;
 | 
| 
 | 
   114 	$find_case_insensitive = ( exists $opts{i} ) ;
 | 
| 
 | 
   115 	$skip_first_line = ( exists $opts{s} ) ;
 | 
| 
 | 
   116 	$find_pattern_is_regex = ( exists $opts{r} ) ;
 | 
| 
 | 
   117 	$replace_global = ( exists $opts{g} ) ;
 | 
| 
 | 
   118 
 | 
| 
 | 
   119 	# Search in specific column ?
 | 
| 
 | 
   120 	if ( defined $opts{c} ) {
 | 
| 
 | 
   121 		$find_in_specific_column = $opts{c};
 | 
| 
 | 
   122 
 | 
| 
 | 
   123 		die "$0: invalid column number ($find_in_specific_column).\n"
 | 
| 
 | 
   124 			unless $find_in_specific_column =~ /^\d+$/ ;
 | 
| 
 | 
   125 			
 | 
| 
 | 
   126 		die "$0: invalid column number ($find_in_specific_column).\n"
 | 
| 
 | 
   127 			if $find_in_specific_column <= 0; 
 | 
| 
 | 
   128 	}
 | 
| 
 | 
   129 	else {
 | 
| 
 | 
   130 		$find_in_specific_column = 0 ;
 | 
| 
 | 
   131 	}
 | 
| 
 | 
   132 
 | 
| 
 | 
   133 	# Output File specified (instead of STDOUT) ?
 | 
| 
 | 
   134 	if ( defined $opts{o} ) {
 | 
| 
 | 
   135 		my $filename = $opts{o};
 | 
| 
 | 
   136 		open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ;
 | 
| 
 | 
   137 	} else {
 | 
| 
 | 
   138 		$output_file = *STDOUT ;
 | 
| 
 | 
   139 	}
 | 
| 
 | 
   140 
 | 
| 
 | 
   141 
 | 
| 
 | 
   142 	# Input file Specified (instead of STDIN) ?
 | 
| 
 | 
   143 	if ( @ARGV>2 ) {
 | 
| 
 | 
   144 		my $filename = $ARGV[2];
 | 
| 
 | 
   145 		open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ;
 | 
| 
 | 
   146 	} else {
 | 
| 
 | 
   147 		$input_file = *STDIN;
 | 
| 
 | 
   148 	}
 | 
| 
 | 
   149 }
 | 
| 
 | 
   150 
 | 
| 
 | 
   151 sub build_regex_string()
 | 
| 
 | 
   152 {
 | 
| 
 | 
   153 	my $find_string ;
 | 
| 
 | 
   154 	my $replace_string ;
 | 
| 
 | 
   155 
 | 
| 
 | 
   156 	if ( $find_pattern_is_regex ) {
 | 
| 
 | 
   157 		$find_string = $find_pattern ;
 | 
| 
 | 
   158 		$replace_string = $replace_pattern ;
 | 
| 
 | 
   159 	} else {
 | 
| 
 | 
   160 		$find_string = quotemeta $find_pattern ;
 | 
| 
 | 
   161 		$replace_string = quotemeta $replace_pattern;
 | 
| 
 | 
   162 	}
 | 
| 
 | 
   163 
 | 
| 
 | 
   164 	if ( $find_complete_words ) {
 | 
| 
 | 
   165 		$find_string = "\\b($find_string)\\b"; 
 | 
| 
 | 
   166 	}
 | 
| 
 | 
   167 
 | 
| 
 | 
   168 	my $regex_string = "s/$find_string/$replace_string/";
 | 
| 
 | 
   169 
 | 
| 
 | 
   170 	$regex_string .= "i" if ( $find_case_insensitive );
 | 
| 
 | 
   171 	$regex_string .= "g" if ( $replace_global ) ;
 | 
| 
 | 
   172 	
 | 
| 
 | 
   173 
 | 
| 
 | 
   174 	return $regex_string;
 | 
| 
 | 
   175 }
 | 
| 
 | 
   176 
 | 
| 
 | 
   177 sub usage()
 | 
| 
 | 
   178 {
 | 
| 
 | 
   179 print <<EOF;
 | 
| 
 | 
   180 
 | 
| 
 | 
   181 Find and Replace
 | 
| 
 | 
   182 Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu )
 | 
| 
 | 
   183 
 | 
| 
 | 
   184 Usage: $0 [-o OUTPUT] [-g] [-r] [-w] [-i] [-c N] [-l] FIND-PATTERN REPLACE-PATTERN [INPUT-FILE]
 | 
| 
 | 
   185 
 | 
| 
 | 
   186    -g   - Global replace - replace all occurences in line/column. 
 | 
| 
 | 
   187           Default - replace just the first instance.
 | 
| 
 | 
   188    -w   - search for complete words (not partial sub-strings).
 | 
| 
 | 
   189    -i   - case insensitive search.
 | 
| 
 | 
   190    -c N - check only column N, instead of entire line (line split by whitespace).
 | 
| 
 | 
   191    -l   - skip first line (don't replace anything in it)
 | 
| 
 | 
   192    -r   - FIND-PATTERN and REPLACE-PATTERN are perl regular expression,
 | 
| 
 | 
   193           usable inside a 's///' statement.
 | 
| 
 | 
   194           By default, they are used as verbatim text strings.
 | 
| 
 | 
   195    -o OUT - specify output file (default = STDOUT).
 | 
| 
 | 
   196    INPUT-FILE - (optional) read from file (default = from STDIN).
 | 
| 
 | 
   197 
 | 
| 
 | 
   198 
 | 
| 
 | 
   199 EOF
 | 
| 
 | 
   200 
 | 
| 
 | 
   201 	exit;
 | 
| 
 | 
   202 }
 |