0
|
1 #!/usr/bin/perl
|
|
2 use strict;
|
|
3 use warnings;
|
|
4 use Getopt::Std;
|
|
5
|
|
6 sub parse_command_line();
|
|
7 sub build_regex_string();
|
|
8 sub usage();
|
|
9
|
|
10 my $input_file ;
|
|
11 my $output_file;
|
|
12 my $find_pattern ;
|
|
13 my $replace_pattern ;
|
|
14 my $find_complete_words ;
|
|
15 my $find_pattern_is_regex ;
|
|
16 my $find_in_specific_column ;
|
|
17 my $find_case_insensitive ;
|
|
18 my $replace_global ;
|
|
19 my $skip_first_line ;
|
|
20
|
|
21
|
|
22 ##
|
|
23 ## Program Start
|
|
24 ##
|
|
25 usage() if @ARGV<2;
|
|
26 parse_command_line();
|
|
27 my $regex_string = build_regex_string() ;
|
|
28
|
|
29 # Allow first line to pass without filtering?
|
|
30 if ( $skip_first_line ) {
|
|
31 my $line = <$input_file>;
|
|
32 print $output_file $line ;
|
|
33 }
|
|
34
|
|
35
|
|
36 ##
|
|
37 ## Main loop
|
|
38 ##
|
|
39
|
|
40 ## I LOVE PERL (and hate it, at the same time...)
|
|
41 ##
|
|
42 ## So what's going on with the self-compiling perl code?
|
|
43 ##
|
|
44 ## 1. The program gets the find-pattern and the replace-pattern from the user (as strings).
|
|
45 ## 2. If both the find-pattern and replace-pattern are simple strings (not regex),
|
|
46 ## it would be possible to pre-compile a regex (with qr//) and use it in a 's///'
|
|
47 ## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references)
|
|
48 ## it is still possible to pre-compile the regex and use it in a 's///'
|
|
49 ## However,
|
|
50 ## 4. If the replace-pattern contains back-references, pre-compiling is not possible.
|
|
51 ## (in perl, you can't precompile a substitute regex).
|
|
52 ## See these examples:
|
|
53 ## http://www.perlmonks.org/?node_id=84420
|
|
54 ## http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl
|
|
55 ##
|
|
56 ## The solution:
|
|
57 ## we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ),
|
|
58 ## Then eval() a new perl code that contains the substitution regex as inlined code.
|
|
59 ## Gotta love perl!
|
|
60
|
|
61 my $perl_program ;
|
|
62 if ( $find_in_specific_column ) {
|
|
63 # Find & replace in specific column
|
|
64
|
|
65 $perl_program = <<EOF;
|
|
66 while ( <STDIN> ) {
|
|
67 chomp ;
|
|
68 my \@columns = split ;
|
|
69
|
|
70 #not enough columns in this line - skip it
|
|
71 next if ( \@columns < $find_in_specific_column ) ;
|
|
72
|
|
73 \$columns [ $find_in_specific_column - 1 ] =~ $regex_string ;
|
|
74
|
|
75 print STDOUT join("\t", \@columns), "\n" ;
|
|
76 }
|
|
77 EOF
|
|
78
|
|
79 } else {
|
|
80 # Find & replace the entire line
|
|
81 $perl_program = <<EOF;
|
|
82 while ( <STDIN> ) {
|
|
83 $regex_string ;
|
|
84 print STDOUT;
|
|
85 }
|
|
86 EOF
|
|
87 }
|
|
88
|
|
89
|
|
90 # The dynamic perl code reads from STDIN and writes to STDOUT,
|
|
91 # so connect these handles (if the user didn't specifiy input / output
|
|
92 # file names, these might be already be STDIN/OUT, so the whole could be a no-op).
|
|
93 *STDIN = $input_file ;
|
|
94 *STDOUT = $output_file ;
|
|
95 eval $perl_program ;
|
|
96
|
|
97
|
|
98 ##
|
|
99 ## Program end
|
|
100 ##
|
|
101
|
|
102
|
|
103 sub parse_command_line()
|
|
104 {
|
|
105 my %opts ;
|
|
106 getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n";
|
|
107
|
|
108 die "$0: missing Find-Pattern argument\n" if (@ARGV==0);
|
|
109 $find_pattern = $ARGV[0];
|
|
110 die "$0: missing Replace-Pattern argument\n" if (@ARGV==1);
|
|
111 $replace_pattern = $ARGV[1];
|
|
112
|
|
113 $find_complete_words = ( exists $opts{w} ) ;
|
|
114 $find_case_insensitive = ( exists $opts{i} ) ;
|
|
115 $skip_first_line = ( exists $opts{s} ) ;
|
|
116 $find_pattern_is_regex = ( exists $opts{r} ) ;
|
|
117 $replace_global = ( exists $opts{g} ) ;
|
|
118
|
|
119 # Search in specific column ?
|
|
120 if ( defined $opts{c} ) {
|
|
121 $find_in_specific_column = $opts{c};
|
|
122
|
|
123 die "$0: invalid column number ($find_in_specific_column).\n"
|
|
124 unless $find_in_specific_column =~ /^\d+$/ ;
|
|
125
|
|
126 die "$0: invalid column number ($find_in_specific_column).\n"
|
|
127 if $find_in_specific_column <= 0;
|
|
128 }
|
|
129 else {
|
|
130 $find_in_specific_column = 0 ;
|
|
131 }
|
|
132
|
|
133 # Output File specified (instead of STDOUT) ?
|
|
134 if ( defined $opts{o} ) {
|
|
135 my $filename = $opts{o};
|
|
136 open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ;
|
|
137 } else {
|
|
138 $output_file = *STDOUT ;
|
|
139 }
|
|
140
|
|
141
|
|
142 # Input file Specified (instead of STDIN) ?
|
|
143 if ( @ARGV>2 ) {
|
|
144 my $filename = $ARGV[2];
|
|
145 open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ;
|
|
146 } else {
|
|
147 $input_file = *STDIN;
|
|
148 }
|
|
149 }
|
|
150
|
|
151 sub build_regex_string()
|
|
152 {
|
|
153 my $find_string ;
|
|
154 my $replace_string ;
|
|
155
|
|
156 if ( $find_pattern_is_regex ) {
|
|
157 $find_string = $find_pattern ;
|
|
158 $replace_string = $replace_pattern ;
|
|
159 } else {
|
|
160 $find_string = quotemeta $find_pattern ;
|
|
161 $replace_string = quotemeta $replace_pattern;
|
|
162 }
|
|
163
|
|
164 if ( $find_complete_words ) {
|
|
165 $find_string = "\\b($find_string)\\b";
|
|
166 }
|
|
167
|
|
168 my $regex_string = "s/$find_string/$replace_string/";
|
|
169
|
|
170 $regex_string .= "i" if ( $find_case_insensitive );
|
|
171 $regex_string .= "g" if ( $replace_global ) ;
|
|
172
|
|
173
|
|
174 return $regex_string;
|
|
175 }
|
|
176
|
|
177 sub usage()
|
|
178 {
|
|
179 print <<EOF;
|
|
180
|
|
181 Find and Replace
|
|
182 Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu )
|
|
183
|
|
184 Usage: $0 [-o OUTPUT] [-g] [-r] [-w] [-i] [-c N] [-l] FIND-PATTERN REPLACE-PATTERN [INPUT-FILE]
|
|
185
|
|
186 -g - Global replace - replace all occurences in line/column.
|
|
187 Default - replace just the first instance.
|
|
188 -w - search for complete words (not partial sub-strings).
|
|
189 -i - case insensitive search.
|
|
190 -c N - check only column N, instead of entire line (line split by whitespace).
|
|
191 -l - skip first line (don't replace anything in it)
|
|
192 -r - FIND-PATTERN and REPLACE-PATTERN are perl regular expression,
|
|
193 usable inside a 's///' statement.
|
|
194 By default, they are used as verbatim text strings.
|
|
195 -o OUT - specify output file (default = STDOUT).
|
|
196 INPUT-FILE - (optional) read from file (default = from STDIN).
|
|
197
|
|
198
|
|
199 EOF
|
|
200
|
|
201 exit;
|
|
202 }
|