Mercurial > repos > plus91-technologies-pvt-ltd > softsearch
diff 2.4/lib/perl5/x86_64-linux-gnu-thread-multi/String/Approx.pm @ 13:e3609c8714fb draft
Uploaded
author | plus91-technologies-pvt-ltd |
---|---|
date | Fri, 30 May 2014 03:37:55 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2.4/lib/perl5/x86_64-linux-gnu-thread-multi/String/Approx.pm Fri May 30 03:37:55 2014 -0400 @@ -0,0 +1,928 @@ +package String::Approx; + +require v5.8.0; + +$VERSION = '3.27'; + +use strict; +local $^W = 1; + +use Carp; +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); + +require Exporter; +require DynaLoader; + +@ISA = qw(Exporter DynaLoader); + +@EXPORT_OK = qw(amatch asubstitute aindex aslice arindex + adist adistr adistword adistrword); + +bootstrap String::Approx $VERSION; + +my $CACHE_MAX = 1000; # high water mark +my $CACHE_PURGE = 0.75; # purge this much of the least used +my $CACHE_N_PURGE; # purge this many of the least used + +sub cache_n_purge () { + $CACHE_N_PURGE = $CACHE_MAX * $CACHE_PURGE; + $CACHE_N_PURGE = 1 if $CACHE_N_PURGE < 1; + return $CACHE_N_PURGE; +} + +cache_n_purge(); + +sub cache_max (;$) { + if (@_ == 0) { + return $CACHE_MAX; + } else { + $CACHE_MAX = shift; + } + $CACHE_MAX = 0 if $CACHE_MAX < 0; + cache_n_purge(); +} + +sub cache_purge (;$) { + if (@_ == 0) { + return $CACHE_PURGE; + } else { + $CACHE_PURGE = shift; + } + if ($CACHE_PURGE < 0) { + $CACHE_PURGE = 0; + } elsif ($CACHE_PURGE > 1) { + $CACHE_PURGE = 1; + } + cache_n_purge(); +} + +my %_simple; +my %_simple_usage_count; + +sub _cf_simple { + my $P = shift; + + my @usage = + sort { $_simple_usage_count{$a} <=> $_simple_usage_count{$b} } + grep { $_ ne $P } + keys %_simple_usage_count; + + # Make room, delete the least used entries. + $#usage = $CACHE_N_PURGE - 1; + + delete @_simple_usage_count{@usage}; + delete @_simple{@usage}; +} + +sub _simple { + my $P = shift; + + my $_simple = new(__PACKAGE__, $P); + + if ($CACHE_MAX) { + $_simple{$P} = $_simple unless exists $_simple{$P}; + + $_simple_usage_count{$P}++; + + if (keys %_simple_usage_count > $CACHE_MAX) { + _cf_simple($P); + } + } + + return ( $_simple ); +} + +sub _parse_param { + use integer; + + my ($n, @param) = @_; + my %param; + + foreach (@param) { + while ($_ ne '') { + s/^\s+//; + if (s/^([IDS]\s*)?(\d+)(\s*%)?//) { + my $k = defined $3 ? (($2-1) * $n) / 100 + ($2 ? 1 : 0) : $2; + + if (defined $1) { + $param{$1} = $k; + } else { + $param{k} = $k; + } + } elsif (s/^initial_position\W+(\d+)\b//) { + $param{'initial_position'} = $1; + } elsif (s/^final_position\W+(\d+)\b//) { + $param{'final_position'} = $1; + } elsif (s/^position_range\W+(\d+)\b//) { + $param{'position_range'} = $1; + } elsif (s/^minimal_distance\b//) { + $param{'minimal_distance'} = 1; + } elsif (s/^i//) { + $param{ i } = 1; + } elsif (s/^g//) { + $param{ g } = 1; + } elsif (s/^\?//) { + $param{'?'} = 1; + } else { + warn "unknown parameter: '$_'\n"; + return; + } + } + } + + return %param; +} + +my %_param_key; +my %_parsed_param; + +my %_complex; +my %_complex_usage_count; + +sub _cf_complex { + my $P = shift; + + my @usage = + sort { $_complex_usage_count{$a} <=> + $_complex_usage_count{$b} } + grep { $_ ne $P } + keys %_complex_usage_count; + + # Make room, delete the least used entries. + $#usage = $CACHE_N_PURGE - 1; + + delete @_complex_usage_count{@usage}; + delete @_complex{@usage}; +} + +sub _complex { + my ($P, @param) = @_; + unshift @param, length $P; + my $param = "@param"; + my $_param_key; + my %param; + my $complex; + my $is_new; + + unless (exists $_param_key{$param}) { + %param = _parse_param(@param); + $_parsed_param{$param} = { %param }; + $_param_key{$param} = join(" ", %param); + } else { + %param = %{ $_parsed_param{$param} }; + } + + $_param_key = $_param_key{$param}; + + if ($CACHE_MAX) { + if (exists $_complex{$P}->{$_param_key}) { + $complex = $_complex{$P}->{$_param_key}; + } + } + + unless (defined $complex) { + if (exists $param{'k'}) { + $complex = new(__PACKAGE__, $P, $param{k}); + } else { + $complex = new(__PACKAGE__, $P); + } + $_complex{$P}->{$_param_key} = $complex if $CACHE_MAX; + $is_new = 1; + } + + if ($is_new) { + $complex->set_greedy unless exists $param{'?'}; + + $complex->set_insertions($param{'I'}) + if exists $param{'I'}; + $complex->set_deletions($param{'D'}) + if exists $param{'D'}; + $complex->set_substitutions($param{'S'}) + if exists $param{'S'}; + + $complex->set_caseignore_slice + if exists $param{'i'}; + + $complex->set_text_initial_position($param{'initial_position'}) + if exists $param{'initial_position'}; + + $complex->set_text_final_position($param{'final_position'}) + if exists $param{'final_position'}; + + $complex->set_text_position_range($param{'position_range'}) + if exists $param{'position_range'}; + + $complex->set_minimal_distance($param{'minimal_distance'}) + if exists $param{'minimal_distance'}; + } + + if ($CACHE_MAX) { + $_complex_usage_count{$P}->{$_param_key}++; + + # If our cache overfloweth. + if (scalar keys %_complex_usage_count > $CACHE_MAX) { + _cf_complex($P); + } + } + + return ( $complex, %param ); +} + +sub cache_disable { + cache_max(0); +} + +sub cache_flush_all { + my $old_purge = cache_purge(); + cache_purge(1); + _cf_simple(''); + _cf_complex(''); + cache_purge($old_purge); +} + +sub amatch { + my $P = shift; + return 1 unless length $P; + my $a = ((@_ && ref $_[0] eq 'ARRAY') ? + _complex($P, @{ shift(@_) }) : _simple($P))[0]; + + if (@_) { + if (wantarray) { + return grep { $a->match($_) } @_; + } else { + foreach (@_) { + return 1 if $a->match($_); + } + return 0; + } + } + if (defined $_) { + if (wantarray) { + return $a->match($_) ? $_ : undef; + } else { + return 1 if $a->match($_); + } + } + return $a->match($_) if defined $_; + + warn "amatch: \$_ is undefined: what are you matching?\n"; + return; +} + +sub _find_substitute { + my ($ri, $rs, $i, $s, $S, $rn) = @_; + + push @{ $ri }, $i; + push @{ $rs }, $s; + + my $pre = substr($_, 0, $i); + my $old = substr($_, $i, $s); + my $suf = substr($_, $i + $s); + my $new = $S; + + $new =~ s/\$\`/$pre/g; + $new =~ s/\$\&/$old/g; + $new =~ s/\$\'/$suf/g; + + push @{ $rn }, $new; +} + +sub _do_substitute { + my ($rn, $ri, $rs, $rS) = @_; + + my $d = 0; + my $n = $_; + + foreach my $i (0..$#$rn) { + substr($n, $ri->[$i] + $d, $rs->[$i]) = $rn->[$i]; + $d += length($rn->[$i]) - $rs->[$i]; + } + + push @{ $rS }, $n; +} + +sub asubstitute { + my $P = shift; + my $S = shift; + my ($a, %p) = + (@_ && ref $_[0] eq 'ARRAY') ? + _complex($P, @{ shift(@_) }) : _simple($P); + + my ($i, $s, @i, @s, @n, @S); + + if (@_) { + if (exists $p{ g }) { + foreach (@_) { + @s = @i = @n = (); + while (($i, $s) = $a->slice_next($_)) { + if (defined $i) { + _find_substitute(\@i, \@s, $i, $s, $S, \@n); + } + } + _do_substitute(\@n, \@i, \@s, \@S) if @n; + } + } else { + foreach (@_) { + @s = @i = @n = (); + ($i, $s) = $a->slice($_); + if (defined $i) { + _find_substitute(\@i, \@s, $i, $s, $S, \@n); + _do_substitute(\@n, \@i, \@s, \@S); + } + } + } + return @S; + } elsif (defined $_) { + if (exists $p{ g }) { + while (($i, $s) = $a->slice_next($_)) { + if (defined $i) { + _find_substitute(\@i, \@s, $i, $s, $S, \@n); + } + } + _do_substitute(\@n, \@i, \@s, \@S) if @n; + } else { + ($i, $s) = $a->slice($_); + if (defined $i) { + _find_substitute(\@i, \@s, $i, $s, $S, \@n); + _do_substitute(\@n, \@i, \@s, \@S); + } + } + return $_ = $n[0]; + } else { + warn "asubstitute: \$_ is undefined: what are you substituting?\n"; + return; + } +} + +sub aindex { + my $P = shift; + return 0 unless length $P; + my $a = ((@_ && ref $_[0] eq 'ARRAY') ? + _complex($P, @{ shift(@_) }) : _simple($P))[0]; + + $a->set_greedy; # The *first* match, thank you. + + if (@_) { + if (wantarray) { + return map { $a->index($_) } @_; + } else { + return $a->index($_[0]); + } + } + return $a->index($_) if defined $_; + + warn "aindex: \$_ is undefined: what are you indexing?\n"; + return; +} + +sub aslice { + my $P = shift; + return (0, 0) unless length $P; + my $a = ((@_ && ref $_[0] eq 'ARRAY') ? + _complex($P, @{ shift(@_) }) : _simple($P))[0]; + + $a->set_greedy; # The *first* match, thank you. + + if (@_) { + return map { [ $a->slice($_) ] } @_; + } + return $a->slice($_) if defined $_; + + warn "aslice: \$_ is undefined: what are you slicing?\n"; + return; +} + +sub _adist { + my $s0 = shift; + my $s1 = shift; + my ($aslice) = aslice($s0, ['minimal_distance', @_], $s1); + my ($index, $size, $distance) = @$aslice; + my ($l0, $l1) = map { length } ($s0, $s1); + return $l0 <= $l1 ? $distance : -$distance; +} + +sub adist { + my $a0 = shift; + my $a1 = shift; + if (length($a0) == 0) { + return length($a1); + } + if (length($a1) == 0) { + return length($a0); + } + my @m = ref $_[0] eq 'ARRAY' ? @{shift()} : (); + if (ref $a0 eq 'ARRAY') { + if (ref $a1 eq 'ARRAY') { + return [ map { adist($a0, $_, @m) } @{$a1} ]; + } else { + return [ map { _adist($_, $a1, @m) } @{$a0} ]; + } + } elsif (ref $a1 eq 'ARRAY') { + return [ map { _adist($a0, $_, @m) } @{$a1} ]; + } else { + if (wantarray) { + return map { _adist($a0, $_, @m) } ($a1, @_); + } else { + return _adist($a0, $a1, @m); + } + } +} + +sub adistr { + my $a0 = shift; + my $a1 = shift; + my @m = ref $_[0] eq 'ARRAY' ? shift : (); + if (ref $a0 eq 'ARRAY') { + if (ref $a1 eq 'ARRAY') { + my $l0 = length(); + return $l0 ? [ map { adist($a0, $_, @m) } + @{$a1} ] : + [ ]; + } else { + return [ map { my $l0 = length(); + $l0 ? _adist($_, $a1, @m) / $l0 : undef + } @{$a0} ]; + } + } elsif (ref $a1 eq 'ARRAY') { + my $l0 = length($a0); + return [] unless $l0; + return [ map { _adist($a0, $_, @m) / $l0 } @{$a1} ]; + } else { + my $l0 = length($a0); + if (wantarray) { + return map { $l0 ? _adist($a0, $_, @m) / $l0 : undef } ($a1, @_); + } else { + return undef unless $l0; + return _adist($a0, $a1, @m) / $l0; + } + } +} + +sub adistword { + return adist($_[0], $_[1], ['position_range=0']); +} + +sub adistrword { + return adistr($_[0], $_[1], ['position_range=0']); +} + +sub arindex { + my $P = shift; + my $l = length $P; + return 0 unless $l; + my $R = reverse $P; + my $a = ((@_ && ref $_[0] eq 'ARRAY') ? + _complex($R, @{ shift(@_) }) : _simple($R))[0]; + + $a->set_greedy; # The *first* match, thank you. + + if (@_) { + if (wantarray) { + return map { + my $aindex = $a->index(scalar reverse()); + $aindex == -1 ? $aindex : (length($_) - $aindex - $l); + } @_; + } else { + my $aindex = $a->index(scalar reverse $_[0]); + return $aindex == -1 ? $aindex : (length($_[0]) - $aindex - $l); + } + } + if (defined $_) { + my $aindex = $a->index(scalar reverse()); + return $aindex == -1 ? $aindex : (length($_) - $aindex - $l); + } + + warn "arindex: \$_ is undefined: what are you indexing?\n"; + return; +} + +1; +__END__ +=pod + +=head1 NAME + +String::Approx - Perl extension for approximate matching (fuzzy matching) + +=head1 SYNOPSIS + + use String::Approx 'amatch'; + + print if amatch("foobar"); + + my @matches = amatch("xyzzy", @inputs); + + my @catches = amatch("plugh", ['2'], @inputs); + +=head1 DESCRIPTION + +String::Approx lets you match and substitute strings approximately. +With this you can emulate errors: typing errorrs, speling errors, +closely related vocabularies (colour color), genetic mutations (GAG +ACT), abbreviations (McScot, MacScot). + +NOTE: String::Approx suits the task of B<string matching>, not +B<string comparison>, and it works for B<strings>, not for B<text>. + +If you want to compare strings for similarity, you probably just want +the Levenshtein edit distance (explained below), the Text::Levenshtein +and Text::LevenshteinXS modules in CPAN. See also Text::WagnerFischer +and Text::PhraseDistance. (There are functions for this in String::Approx, +e.g. adist(), but their results sometimes differ from the bare Levenshtein +et al.) + +If you want to compare things like text or source code, consisting of +B<words> or B<tokens> and B<phrases> and B<sentences>, or +B<expressions> and B<statements>, you should probably use some other +tool than String::Approx, like for example the standard UNIX diff(1) +tool, or the Algorithm::Diff module from CPAN. + +The measure of B<approximateness> is the I<Levenshtein edit distance>. +It is the total number of "edits": insertions, + + word world + +deletions, + + monkey money + +and substitutions + + sun fun + +required to transform a string to another string. For example, to +transform I<"lead"> into I<"gold">, you need three edits: + + lead gead goad gold + +The edit distance of "lead" and "gold" is therefore three, or 75%. + +B<String::Approx> uses the Levenshtein edit distance as its measure, but +String::Approx is not well-suited for comparing strings of different +length, in other words, if you want a "fuzzy eq", see above. +String::Approx is more like regular expressions or index(), it finds +substrings that are close matches.> + +=head1 MATCH + + use String::Approx 'amatch'; + + $matched = amatch("pattern") + $matched = amatch("pattern", [ modifiers ]) + + $any_matched = amatch("pattern", @inputs) + $any_matched = amatch("pattern", [ modifiers ], @inputs) + + @match = amatch("pattern") + @match = amatch("pattern", [ modifiers ]) + + @matches = amatch("pattern", @inputs) + @matches = amatch("pattern", [ modifiers ], @inputs) + +Match B<pattern> approximately. In list context return the matched +B<@inputs>. If no inputs are given, match against the B<$_>. In scalar +context return true if I<any> of the inputs match, false if none match. + +Notice that the pattern is a string. Not a regular expression. None +of the regular expression notations (^, ., *, and so on) work. They +are characters just like the others. Note-on-note: some limited form +of I<"regular expressionism"> is planned in future: for example +character classes ([abc]) and I<any-chars> (.). But that feature will +be turned on by a special I<modifier> (just a guess: "r"), so there +should be no backward compatibility problem. + +Notice also that matching is not symmetric. The inputs are matched +against the pattern, not the other way round. In other words: the +pattern can be a substring, a submatch, of an input element. An input +element is always a superstring of the pattern. + +=head2 MODIFIERS + +With the modifiers you can control the amount of approximateness and +certain other control variables. The modifiers are one or more +strings, for example B<"i">, within a string optionally separated by +whitespace. The modifiers are inside an anonymous array: the B<[ ]> +in the syntax are not notational, they really do mean B<[ ]>, for +example B<[ "i", "2" ]>. B<["2 i"]> would be identical. + +The implicit default approximateness is 10%, rounded up. In other +words: every tenth character in the pattern may be an error, an edit. +You can explicitly set the maximum approximateness by supplying a +modifier like + + number + number% + +Examples: B<"3">, B<"15%">. + +Note that C<0%> is not rounded up, it is equal to C<0>. + +Using a similar syntax you can separately control the maximum number +of insertions, deletions, and substitutions by prefixing the numbers +with I, D, or S, like this: + + Inumber + Inumber% + Dnumber + Dnumber% + Snumber + Snumber% + +Examples: B<"I2">, B<"D20%">, B<"S0">. + +You can ignore case (B<"A"> becames equal to B<"a"> and vice versa) +by adding the B<"i"> modifier. + +For example + + [ "i 25%", "S0" ] + +means I<ignore case>, I<allow every fourth character to be "an edit">, +but allow I<no substitutions>. (See L<NOTES> about disallowing +substitutions or insertions.) + +NOTE: setting C<I0 D0 S0> is not equivalent to using index(). +If you want to use index(), use index(). + +=head1 SUBSTITUTE + + use String::Approx 'asubstitute'; + + @substituted = asubstitute("pattern", "replacement") + @substituted = asubstitute("pattern", "replacement", @inputs) + @substituted = asubstitute("pattern", "replacement", [ modifiers ]) + @substituted = asubstitute("pattern", "replacement", + [ modifiers ], @inputs) + +Substitute approximate B<pattern> with B<replacement> and return as a +list <copies> of B<@inputs>, the substitutions having been made on the +elements that did match the pattern. If no inputs are given, +substitute in the B<$_>. The replacement can contain magic strings +B<$&>, B<$`>, B<$'> that stand for the matched string, the string +before it, and the string after it, respectively. All the other +arguments are as in C<amatch()>, plus one additional modifier, B<"g"> +which means substitute globally (all the matches in an element and not +just the first one, as is the default). + +See L<BAD NEWS> about the unfortunate stinginess of C<asubstitute()>. + +=head1 INDEX + + use String::Approx 'aindex'; + + $index = aindex("pattern") + @indices = aindex("pattern", @inputs) + $index = aindex("pattern", [ modifiers ]) + @indices = aindex("pattern", [ modifiers ], @inputs) + +Like C<amatch()> but returns the index/indices at which the pattern +matches approximately. In list context and if C<@inputs> are used, +returns a list of indices, one index for each input element. +If there's no approximate match, C<-1> is returned as the index. + +NOTE: if there is character repetition (e.g. "aa") either in +the pattern or in the text, the returned index might start +"too early". This is consistent with the goal of the module +of matching "as early as possible", just like regular expressions +(that there might be a "less approximate" match starting later is +of somewhat irrelevant). + +There's also backwards-scanning C<arindex()>. + +=head1 SLICE + + use String::Approx 'aslice'; + + ($index, $size) = aslice("pattern") + ([$i0, $s0], ...) = aslice("pattern", @inputs) + ($index, $size) = aslice("pattern", [ modifiers ]) + ([$i0, $s0], ...) = aslice("pattern", [ modifiers ], @inputs) + +Like C<aindex()> but returns also the size (length) of the match. +If the match fails, returns an empty list (when matching against C<$_>) +or an empty anonymous list corresponding to the particular input. + +NOTE: size of the match will very probably be something you did not +expect (such as longer than the pattern, or a negative number). This +may or may not be fixed in future releases. Also the beginning of the +match may vary from the expected as with aindex(), see above. + +If the modifier + + "minimal_distance" + +is used, the minimal possible edit distance is returned as the +third element: + + ($index, $size, $distance) = aslice("pattern", [ modifiers ]) + ([$i0, $s0, $d0], ...) = aslice("pattern", [ modifiers ], @inputs) + +=head1 DISTANCE + + use String::Approx 'adist'; + + $dist = adist("pattern", $input); + @dist = adist("pattern", @input); + +Return the I<edit distance> or distances between the pattern and the +input or inputs. Zero edit distance means exact match. (Remember +that the match can 'float' in the inputs, the match is a substring +match.) If the pattern is longer than the input or inputs, the +returned distance or distances is or are negative. + + use String::Approx 'adistr'; + + $dist = adistr("pattern", $input); + @dist = adistr("pattern", @inputs); + +Return the B<relative> I<edit distance> or distances between the +pattern and the input or inputs. Zero relative edit distance means +exact match, one means completely different. (Remember that the +match can 'float' in the inputs, the match is a substring match.) If +the pattern is longer than the input or inputs, the returned distance +or distances is or are negative. + +You can use adist() or adistr() to sort the inputs according to their +approximateness: + + my %d; + @d{@inputs} = map { abs } adistr("pattern", @inputs); + my @d = sort { $d{$a} <=> $d{$b} } @inputs; + +Now C<@d> contains the inputs, the most like C<"pattern"> first. + +=head1 CONTROLLING THE CACHE + +C<String::Approx> maintains a LU (least-used) cache that holds the +'matching engines' for each instance of a I<pattern+modifiers>. The +cache is intended to help the case where you match a small set of +patterns against a large set of string. However, the more engines you +cache the more you eat memory. If you have a lot of different +patterns or if you have a lot of memory to burn, you may want to +control the cache yourself. For example, allowing a larger cache +consumes more memory but probably runs a little bit faster since the +cache fills (and needs flushing) less often. + +The cache has two parameters: I<max> and I<purge>. The first one +is the maximum size of the cache and the second one is the cache +flushing ratio: when the number of cache entries exceeds I<max>, +I<max> times I<purge> cache entries are flushed. The default +values are 1000 and 0.75, respectively, which means that when +the 1001st entry would be cached, 750 least used entries will +be removed from the cache. To access the parameters you can +use the calls + + $now_max = String::Approx::cache_max(); + String::Approx::cache_max($new_max); + + $now_purge = String::Approx::cache_purge(); + String::Approx::cache_purge($new_purge); + + $limit = String::Approx::cache_n_purge(); + +To be honest, there are actually B<two> caches: the first one is used +far the patterns with no modifiers, the second one for the patterns +with pattern modifiers. Using the standard parameters you will +therefore actually cache up to 2000 entries. The above calls control +both caches for the same price. + +To disable caching completely use + + String::Approx::cache_disable(); + +Note that this doesn't flush any possibly existing cache entries, +to do that use + + String::Approx::cache_flush_all(); + +=head1 NOTES + +Because matching is by I<substrings>, not by whole strings, insertions +and substitutions produce often very similar results: "abcde" matches +"axbcde" either by insertion B<or> substitution of "x". + +The maximum edit distance is also the maximum number of edits. +That is, the B<"I2"> in + + amatch("abcd", ["I2"]) + +is useless because the maximum edit distance is (implicitly) 1. +You may have meant to say + + amatch("abcd", ["2D1S1"]) + +or something like that. + +If you want to simulate transposes + + feet fete + +you need to allow at least edit distance of two because in terms of +our edit primitives a transpose is first one deletion and then one +insertion. + +=head2 TEXT POSITION + +The starting and ending positions of matching, substituting, indexing, or +slicing can be changed from the beginning and end of the input(s) to +some other positions by using either or both of the modifiers + + "initial_position=24" + "final_position=42" + +or the both the modifiers + + "initial_position=24" + "position_range=10" + +By setting the B<"position_range"> to be zero you can limit +(anchor) the operation to happen only once (if a match is possible) +at the position. + +=head1 VERSION + +Major release 3. + +=head1 CHANGES FROM VERSION 2 + +=head2 GOOD NEWS + +=over 4 + +=item The version 3 is 2-3 times faster than version 2 + +=item No pattern length limitation + +The algorithm is independent on the pattern length: its time +complexity is I<O(kn)>, where I<k> is the number of edits and I<n> the +length of the text (input). The preprocessing of the pattern will of +course take some I<O(m)> (I<m> being the pattern length) time, but +C<amatch()> and C<asubstitute()> cache the result of this +preprocessing so that it is done only once per pattern. + +=back + +=head2 BAD NEWS + +=over 4 + +=item You do need a C compiler to install the module + +Perl's regular expressions are no more used; instead a faster and more +scalable algorithm written in C is used. + +=item C<asubstitute()> is now always stingy + +The string matched and substituted is now always stingy, as short +as possible. It used to be as long as possible. This is an unfortunate +change stemming from switching the matching algorithm. Example: with +edit distance of two and substituting for B<"word"> from B<"cork"> and +B<"wool"> previously did match B<"cork"> and B<"wool">. Now it does +match B<"or"> and B<"wo">. As little as possible, or, in other words, +with as much approximateness, as many edits, as possible. Because +there is no I<need> to match the B<"c"> of B<"cork">, it is not matched. + +=item no more C<aregex()> because regular expressions are no more used + +=item no more C<compat1> for String::Approx version 1 compatibility + +=back + +=head1 ACKNOWLEDGEMENTS + +The following people have provided valuable test cases, documentation +clarifications, and other feedback: + +Jared August, Arthur Bergman, Anirvan Chatterjee, Steve A. Chervitz, +Aldo Calpini, David Curiel, Teun van den Dool, Alberto Fontaneda, +Rob Fugina, Dmitrij Frishman, Lars Gregersen, Kevin Greiner, +B. Elijah Griffin, Mike Hanafey, Mitch Helle, Ricky Houghton, +'idallen', Helmut Jarausch, Damian Keefe, Ben Kennedy, Craig Kelley, +Franz Kirsch, Dag Kristian, Mark Land, J. D. Laub, John P. Linderman, +Tim Maher, Juha Muilu, Sergey Novoselov, Andy Oram, Ji Y Park, +Eric Promislow, Nikolaus Rath, Stefan Ram, Slaven Rezic, +Dag Kristian Rognlien, Stewart Russell, Slaven Rezic, Chris Rosin, +Pasha Sadri, Ilya Sandler, Bob J.A. Schijvenaars, Ross Smith, +Frank Tobin, Greg Ward, Rich Williams, Rick Wise. + +The matching algorithm was developed by Udi Manber, Sun Wu, and Burra +Gopal in the Department of Computer Science, University of Arizona. + +=head1 AUTHOR + +Jarkko Hietaniemi <jhi@iki.fi> + +=head1 COPYRIGHT AND LICENSE + +Copyright 2001-2013 by Jarkko Hietaniemi + +This library is free software; you can redistribute it and/or modify +under either the terms of the Artistic License 2.0, or the GNU Library +General Public License, Version 2. See the files Artistic and LGPL +for more details. + +Furthermore: no warranties or obligations of any kind are given, and +the separate file F<COPYRIGHT> must be included intact in all copies +and derived materials. + +=cut