0
+ − 1 #!/usr/bin/env perl
+ − 2 ##
+ − 3 ## Sort-header - wrapper for GNU sort with header-line support
+ − 4 ##
+ − 5 ## Copyright(C) A. Gordon
+ − 6 ## license AGPLv3+
+ − 7 ##
+ − 8 use strict;
+ − 9 use warnings;
+ − 10 use Data::Dumper;
+ − 11 use IO::Handle;
+ − 12 use Getopt::Long qw(:config bundling no_ignore_case_always);
+ − 13
+ − 14 ## Forward declarations
+ − 15 sub add_standard_sort_param(@);
+ − 16 sub add_standard_sort_param_value(@);
+ − 17 sub forbidden_sort_param(@);
+ − 18 sub show_help();
+ − 19 sub show_version();
+ − 20 sub show_examples();
+ − 21 sub parse_commandline_options();
+ − 22 sub reassign_input_output();
+ − 23 sub process_header_lines();
+ − 24 sub run_sort();
+ − 25 sub read_line_non_buffered();
+ − 26
+ − 27
+ − 28 ##
+ − 29 ## Runtime options
+ − 30 ##
+ − 31 my $PROGRAM="sort-header";
+ − 32 my $VERSION=0.4;
+ − 33
+ − 34 my $check_only=undef;
+ − 35 my $input_file=undef;
+ − 36 my $output_file=undef;
+ − 37 my $field_separator=undef;
+ − 38 my $header_lines =1 ;
+ − 39 my $debug=undef;
+ − 40 my $sort_exit_code=1; #by default, assume some error
+ − 41
+ − 42 my @sort_options;
+ − 43
+ − 44 ##
+ − 45 ## Program Start
+ − 46 ##
+ − 47 parse_commandline_options();
+ − 48 reassign_input_output();
+ − 49 process_header_lines();
+ − 50 run_sort();
+ − 51 exit($sort_exit_code);
+ − 52 ##
+ − 53 ## Program End
+ − 54 ##
+ − 55
+ − 56 sub show_examples()
+ − 57 {
+ − 58 print<<EOF;
+ − 59 Sorting a file with a header line:
+ − 60
+ − 61 \$ cat input.txt
+ − 62 Fruit Color Price
+ − 63 Banana Yellow 4.1
+ − 64 Avocado Green 8.0
+ − 65 Apple Red 3.0
+ − 66 Melon Green 6.1
+ − 67
+ − 68 # By default, 'sort-header' assumes 1 header line
+ − 69 # (no need to use --header in this case).
+ − 70
+ − 71 \$ sort-header -k3,3nr input.txt
+ − 72 Fruit Color Price
+ − 73 Avocado Green 8.0
+ − 74 Melon Green 6.1
+ − 75 Banana Yellow 4.1
+ − 76 Apple Red 3.0
+ − 77
+ − 78 EOF
+ − 79 exit(0);
+ − 80 }
+ − 81
+ − 82 sub show_help()
+ − 83 {
+ − 84 print<<EOF;
+ − 85 ${PROGRAM}: Wrapper for GNU sort, allowing sorting files with header lines.
+ − 86
+ − 87 Usage: $PROGRAM [HEADER-OPTIONS] [GNU sort Options] [INPUT-FILE]
+ − 88
+ − 89 HEADER-OPTIONS: the following options are supported by '${PROGRAM}':
+ − 90
+ − 91 --header N = Treat the first N lines as header lines.
+ − 92 These line will NOT be sorted. They will be passed
+ − 93 directly to the output file. (default: 1)
+ − 94
+ − 95 --version = Print ${PROGRAM}'s version.
+ − 96
+ − 97 --debugheader = Print debug messages (relating to ${PROGRAM}'s operation).
+ − 98
+ − 99 --help = Show this help screen.
+ − 100
+ − 101 --examples = Show usage examples.
+ − 102
+ − 103 GNU sort options:
+ − 104 Most of the standard GNU sort options are supported and passed to GNU sort.
+ − 105 The following options can not be used with '${PROGRAM}':
+ − 106
+ − 107 -m --merge => ${PROGRAM} can only sort one file, not merge multiple files.
+ − 108 -c -C --check => Currently not supported
+ − 109 --files0-from => Currently not supported
+ − 110 -z --zero-terminated => Currently not supported
+ − 111
+ − 112 INPUT-FILE:
+ − 113 If INPUT-FILE is not specified, $PROGRAM will use STDIN (just like GNU sort).
+ − 114
+ − 115 EOF
+ − 116 exit(0);
+ − 117 }
+ − 118
+ − 119 sub show_version()
+ − 120 {
+ − 121 print<<EOF;
+ − 122 $PROGRAM $VERSION
+ − 123 Copyright (C) 2010 A. Gordon (gordon\@cshl.edu)
+ − 124 License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html)
+ − 125
+ − 126 To see the GNU's sort version, run:
+ − 127 sort --version
+ − 128 EOF
+ − 129 exit(0);
+ − 130 }
+ − 131
+ − 132 sub parse_commandline_options()
+ − 133 {
+ − 134 my $rc = GetOptions(
+ − 135 "ignore-leading-blanks|b" => \&add_standard_sort_param,
+ − 136 "dictionary-order|d" => \&add_standard_sort_param,
+ − 137 "ignore-case|f" => \&add_standard_sort_param,
+ − 138 "general-numeric-sort|g" => \&add_standard_sort_param,
+ − 139 "ignore-nonprinting|i" => \&add_standard_sort_param,
+ − 140 "month-sort|M" => \&add_standard_sort_param,
+ − 141 "human-numeric-sort|h" => \&add_standard_sort_param,
+ − 142 "numeric-sort|n" => \&add_standard_sort_param,
+ − 143 "random-source=s" => \&add_standard_sort_param_value,
+ − 144 "random-sort|R" => \&add_standard_sort_param,
+ − 145 "reverse|r" => \&add_standard_sort_param,
+ − 146 "sort=s" => \&add_standard_sort_param_value,
+ − 147 "version-sort|V" => \&add_standard_sort_param,
+ − 148
+ − 149 "check|c" => \&forbidden_sort_param,
+ − 150 "C" => \&forbidden_sort_param,
+ − 151 "compress-program=s" => \&add_standard_sort_param_value,
+ − 152 "debug" => \&add_standard_sort_param,
+ − 153
+ − 154 "files0-from=s" => \&forbidden_sort_param,
+ − 155
+ − 156 "key|k=s" => \&add_standard_sort_param_value,
+ − 157 "merge|m" => \&forbidden_sort_param,
+ − 158 "batch-size=i" => \&forbidden_sort_param,
+ − 159
+ − 160 "parallel=i" => \&add_standard_sort_param_value,
+ − 161
+ − 162 "output|o=s" => \$output_file,
+ − 163
+ − 164 "stable|s" => \&add_standard_sort_param,
+ − 165 "buffer-size|S=s" => \&add_standard_sort_param_value,
+ − 166
+ − 167 "field-separator|t=s" => \&add_standard_sort_param_value,
+ − 168 "temporary-directory|T=s" => \&add_standard_sort_param_value,
+ − 169 "unique|u" => \&add_standard_sort_param,
+ − 170
+ − 171 "zero-terminated|z" => \&forbidden_sort_param,
+ − 172
+ − 173 "help" => \&show_help,
+ − 174 "version" => \&show_version,
+ − 175 "examples" => \&show_examples,
+ − 176
+ − 177 "header=i" => \$header_lines,
+ − 178 "debugheader" => \$debug,
+ − 179 );
+ − 180
+ − 181 exit 1 unless $rc;
+ − 182
+ − 183 my @INPUT_FILES = @ARGV;
+ − 184
+ − 185 die "$PROGRAM: error: invalid number of header lines ($header_lines)\n" unless $header_lines>=0;
+ − 186 die "$PROGRAM: error: Multiple input files specified. This program can sort only a signle file.\n" if (scalar(@INPUT_FILES)>1);
+ − 187 $input_file = shift @INPUT_FILES if scalar(@INPUT_FILES)==1;
+ − 188
+ − 189 if ($debug) {
+ − 190 warn "$PROGRAM: number of header lines = $header_lines\n";
+ − 191 warn "$PROGRAM: PASS-to-Sort options:\n", Dumper(\@sort_options), "\n";
+ − 192 }
+ − 193 }
+ − 194
+ − 195 sub reassign_input_output()
+ − 196 {
+ − 197 if ($output_file) {
+ − 198 warn "$PROGRAM: Re-assigning STDOUT to '$output_file'\n" if $debug;
+ − 199 open OUTPUT, '>', $output_file or die "$PROGRAM: Error: failed to create output file '$output_file': $!\n";
+ − 200 STDOUT->fdopen(\*OUTPUT, 'w') or die "$PROGRAM: Error: failed to reassign STDOUT to '$output_file': $!\n";
+ − 201 }
+ − 202
+ − 203
+ − 204 if ($input_file) {
+ − 205 warn "$PROGRAM: Re-assigning STDIN to '$input_file'\n" if $debug;
+ − 206 open INPUT, '<', $input_file or die "$PROGRAM: Error: failed to open input file '$input_file': $!\n";
+ − 207 STDIN->fdopen(\*INPUT, 'r') or die "$PROGRAM: Error: failed to reassign STDIN to '$input_file': $!\n";
+ − 208 }
+ − 209 }
+ − 210
+ − 211 sub process_header_lines()
+ − 212 {
+ − 213 warn "$PROGRAM: Reading $header_lines header lines...\n" if $debug;
+ − 214 for (my $i=0; $i<$header_lines; $i++) {
+ − 215 my $line = read_line_non_buffered();
+ − 216 exit unless defined $line;
+ − 217 print $line;
+ − 218 }
+ − 219 }
+ − 220
+ − 221 sub run_sort()
+ − 222 {
+ − 223 warn "$PROGRAM: Running GNU sort...\n" if $debug;
+ − 224 system('sort', @sort_options);
+ − 225 if ($? == -1) {
+ − 226 die "$PROGRAM: Error: failed to execute 'sort': $!\n";
+ − 227 }
+ − 228 elsif ($? & 127) {
+ − 229 my $signal = ($? & 127);
+ − 230 kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide
+ − 231 die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n";
+ − 232 }
+ − 233 else {
+ − 234 $sort_exit_code = ($? >> 8);
+ − 235 }
+ − 236 }
+ − 237
+ − 238
+ − 239 sub add_standard_sort_param(@)
+ − 240 {
+ − 241 my ($obj)= @_;
+ − 242 add_standard_sort_param_value($obj, undef);
+ − 243 }
+ − 244
+ − 245 sub add_standard_sort_param_value(@)
+ − 246 {
+ − 247 my ($obj,$value)= @_;
+ − 248
+ − 249 my $option = "" . $obj ; #stringify the optino object, get the option name.
+ − 250
+ − 251 if (length($option)==1) {
+ − 252 $option = "-" . $option ;
+ − 253 } else {
+ − 254 $option = "--" . $option ;
+ − 255 }
+ − 256 push @sort_options, $option ;
+ − 257 push @sort_options, $value if $value;
+ − 258 }
+ − 259
+ − 260 sub forbidden_sort_param(@)
+ − 261 {
+ − 262 my ($obj,$value)= @_;
+ − 263 my $option = "" . $obj ; #stringify the optino object, get the option name.
+ − 264
+ − 265 die "$PROGRAM: Error: option '$option' can not be used with this program. If you must use it, run GNU sort directly. see --help for more details.\n";
+ − 266 }
+ − 267
+ − 268 sub read_line_non_buffered()
+ − 269 {
+ − 270 my $line = '';
+ − 271 while ( 1 ) {
+ − 272 my $c;
+ − 273 my $rc = sysread STDIN, $c, 1;
+ − 274 die "$PROGRAM: STDIN Read error: $!" unless defined $rc;
+ − 275 return $line if $rc==0 && $line;
+ − 276 return undef if $rc==0 && (!$line);
+ − 277 $line .= $c ;
+ − 278 return $line if ( $c eq "\n");
+ − 279 }
+ − 280 }
+ − 281