0
|
1 #! /usr/bin/perl -w
|
|
2
|
|
3 use strict;
|
|
4 use warnings;
|
|
5
|
|
6 # condenses all consecutive characters of one type
|
|
7 # convert_characters.pl [input] [character] [output]
|
|
8
|
|
9 die "Check arguments" unless @ARGV == 3;
|
|
10
|
|
11 my $inputfile = $ARGV[0];
|
|
12 my $character = $ARGV[1];
|
|
13 my $outputfile = $ARGV[2];
|
|
14
|
|
15
|
|
16 my $convert_from;
|
|
17 my $convert_to;
|
|
18
|
|
19
|
|
20 if ($character eq "s")
|
|
21 {
|
|
22 $convert_from = '\s';
|
|
23 }
|
|
24 elsif ($character eq "T")
|
|
25 {
|
|
26 $convert_from = '\t';
|
|
27 }
|
|
28 elsif ($character eq "Sp")
|
|
29 {
|
|
30 $convert_from = " ";
|
|
31 }
|
|
32 elsif ($character eq "Dt")
|
|
33 {
|
|
34 $convert_from = '\.';
|
|
35 }
|
|
36 elsif ($character eq "C")
|
|
37 {
|
|
38 $convert_from = ",";
|
|
39 }
|
|
40 elsif ($character eq "D")
|
|
41 {
|
|
42 $convert_from = "-";
|
|
43 }
|
|
44 elsif ($character eq "U")
|
|
45 {
|
|
46 $convert_from = "_";
|
|
47 }
|
|
48 elsif ($character eq "P")
|
|
49 {
|
|
50 $convert_from = '\|';
|
|
51 }
|
|
52 else
|
|
53 {
|
|
54 die "Invalid value specified for convert from\n";
|
|
55 }
|
|
56
|
|
57
|
|
58 if ($character eq "T")
|
|
59 {
|
|
60 $convert_to = "\t";
|
|
61 }
|
|
62 elsif ($character eq "Sp")
|
|
63 {
|
|
64 $convert_to = " ";
|
|
65 }
|
|
66 elsif ($character eq "Dt")
|
|
67 {
|
|
68 $convert_to = "\.";
|
|
69 }
|
|
70 elsif ($character eq "C")
|
|
71 {
|
|
72 $convert_to = ",";
|
|
73 }
|
|
74 elsif ($character eq "D")
|
|
75 {
|
|
76 $convert_to = "-";
|
|
77 }
|
|
78 elsif ($character eq "U")
|
|
79 {
|
|
80 $convert_to = "_";
|
|
81 }
|
|
82 elsif ($character eq "P")
|
|
83 {
|
|
84 $convert_to = "|";
|
|
85 }
|
|
86 else
|
|
87 {
|
|
88 die "Invalid value specified for Convert to\n";
|
|
89 }
|
|
90
|
|
91 my $fhIn;
|
|
92 open ($fhIn, "< $inputfile") or die "Cannot open source file";
|
|
93
|
|
94 my $fhOut;
|
|
95 open ($fhOut, "> $outputfile");
|
|
96
|
|
97 while (<$fhIn>)
|
|
98 {
|
|
99 my $thisLine = $_;
|
|
100 chomp $thisLine;
|
|
101 $thisLine =~ s/${convert_from}+/$convert_to/g;
|
|
102 print $fhOut $thisLine,"\n";
|
|
103 }
|
|
104 close ($fhIn) or die "Cannot close source file";
|
|
105 close ($fhOut) or die "Cannot close output file";
|