softsearch: 2.4/lib/perl5/x86_64-linux-gnu-thread-multi/String/Approx.pm annotate

annotate 2.4/lib/perl5/x86_64-linux-gnu-thread-multi/String/Approx.pm @ 18:1163c16cb3c0 draft

Uploaded

author	plus91-technologies-pvt-ltd
date	Mon, 02 Jun 2014 07:35:53 -0400
parents	e3609c8714fb
children

rev	line source
13 e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	1 package String::Approx;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	2
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	3 require v5.8.0;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	4
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	5 $VERSION = '3.27';
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	6
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	7 use strict;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	8 local $^W = 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	9
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	10 use Carp;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	11 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	12
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	13 require Exporter;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	14 require DynaLoader;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	15
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	16 @ISA = qw(Exporter DynaLoader);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	17
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	18 @EXPORT_OK = qw(amatch asubstitute aindex aslice arindex
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	19 adist adistr adistword adistrword);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	20
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	21 bootstrap String::Approx $VERSION;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	22
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	23 my $CACHE_MAX = 1000; # high water mark
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	24 my $CACHE_PURGE = 0.75; # purge this much of the least used
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	25 my $CACHE_N_PURGE; # purge this many of the least used
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	26
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	27 sub cache_n_purge () {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	28 $CACHE_N_PURGE = $CACHE_MAX * $CACHE_PURGE;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	29 $CACHE_N_PURGE = 1 if $CACHE_N_PURGE < 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	30 return $CACHE_N_PURGE;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	31 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	32
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	33 cache_n_purge();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	34
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	35 sub cache_max (;$) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	36 if (@_ == 0) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	37 return $CACHE_MAX;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	38 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	39 $CACHE_MAX = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	40 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	41 $CACHE_MAX = 0 if $CACHE_MAX < 0;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	42 cache_n_purge();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	43 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	44
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	45 sub cache_purge (;$) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	46 if (@_ == 0) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	47 return $CACHE_PURGE;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	48 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	49 $CACHE_PURGE = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	50 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	51 if ($CACHE_PURGE < 0) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	52 $CACHE_PURGE = 0;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	53 } elsif ($CACHE_PURGE > 1) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	54 $CACHE_PURGE = 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	55 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	56 cache_n_purge();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	57 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	58
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	59 my %_simple;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	60 my %_simple_usage_count;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	61
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	62 sub _cf_simple {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	63 my $P = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	64
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	65 my @usage =
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	66 sort { $_simple_usage_count{$a} <=> $_simple_usage_count{$b} }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	67 grep { $_ ne $P }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	68 keys %_simple_usage_count;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	69
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	70 # Make room, delete the least used entries.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	71 $#usage = $CACHE_N_PURGE - 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	72
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	73 delete @_simple_usage_count{@usage};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	74 delete @_simple{@usage};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	75 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	76
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	77 sub _simple {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	78 my $P = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	79
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	80 my $_simple = new(__PACKAGE__, $P);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	81
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	82 if ($CACHE_MAX) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	83 $_simple{$P} = $_simple unless exists $_simple{$P};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	84
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	85 $_simple_usage_count{$P}++;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	86
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	87 if (keys %_simple_usage_count > $CACHE_MAX) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	88 _cf_simple($P);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	89 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	90 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	91
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	92 return ( $_simple );
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	93 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	94
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	95 sub _parse_param {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	96 use integer;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	97
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	98 my ($n, @param) = @_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	99 my %param;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	100
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	101 foreach (@param) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	102 while ($_ ne '') {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	103 s/^\s+//;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	104 if (s/^([IDS]\s)?(\d+)(\s%)?//) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	105 my $k = defined $3 ? (($2-1) * $n) / 100 + ($2 ? 1 : 0) : $2;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	106
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	107 if (defined $1) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	108 $param{$1} = $k;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	109 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	110 $param{k} = $k;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	111 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	112 } elsif (s/^initial_position\W+(\d+)\b//) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	113 $param{'initial_position'} = $1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	114 } elsif (s/^final_position\W+(\d+)\b//) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	115 $param{'final_position'} = $1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	116 } elsif (s/^position_range\W+(\d+)\b//) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	117 $param{'position_range'} = $1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	118 } elsif (s/^minimal_distance\b//) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	119 $param{'minimal_distance'} = 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	120 } elsif (s/^i//) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	121 $param{ i } = 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	122 } elsif (s/^g//) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	123 $param{ g } = 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	124 } elsif (s/^\?//) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	125 $param{'?'} = 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	126 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	127 warn "unknown parameter: '$_'\n";
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	128 return;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	129 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	130 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	131 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	132
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	133 return %param;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	134 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	135
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	136 my %_param_key;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	137 my %_parsed_param;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	138
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	139 my %_complex;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	140 my %_complex_usage_count;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	141
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	142 sub _cf_complex {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	143 my $P = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	144
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	145 my @usage =
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	146 sort { $_complex_usage_count{$a} <=>
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	147 $_complex_usage_count{$b} }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	148 grep { $_ ne $P }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	149 keys %_complex_usage_count;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	150
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	151 # Make room, delete the least used entries.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	152 $#usage = $CACHE_N_PURGE - 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	153
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	154 delete @_complex_usage_count{@usage};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	155 delete @_complex{@usage};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	156 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	157
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	158 sub _complex {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	159 my ($P, @param) = @_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	160 unshift @param, length $P;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	161 my $param = "@param";
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	162 my $_param_key;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	163 my %param;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	164 my $complex;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	165 my $is_new;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	166
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	167 unless (exists $_param_key{$param}) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	168 %param = _parse_param(@param);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	169 $_parsed_param{$param} = { %param };
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	170 $_param_key{$param} = join(" ", %param);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	171 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	172 %param = %{ $_parsed_param{$param} };
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	173 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	174
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	175 $_param_key = $_param_key{$param};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	176
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	177 if ($CACHE_MAX) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	178 if (exists $_complex{$P}->{$_param_key}) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	179 $complex = $_complex{$P}->{$_param_key};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	180 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	181 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	182
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	183 unless (defined $complex) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	184 if (exists $param{'k'}) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	185 $complex = new(__PACKAGE__, $P, $param{k});
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	186 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	187 $complex = new(__PACKAGE__, $P);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	188 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	189 $_complex{$P}->{$_param_key} = $complex if $CACHE_MAX;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	190 $is_new = 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	191 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	192
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	193 if ($is_new) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	194 $complex->set_greedy unless exists $param{'?'};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	195
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	196 $complex->set_insertions($param{'I'})
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	197 if exists $param{'I'};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	198 $complex->set_deletions($param{'D'})
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	199 if exists $param{'D'};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	200 $complex->set_substitutions($param{'S'})
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	201 if exists $param{'S'};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	202
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	203 $complex->set_caseignore_slice
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	204 if exists $param{'i'};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	205
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	206 $complex->set_text_initial_position($param{'initial_position'})
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	207 if exists $param{'initial_position'};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	208
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	209 $complex->set_text_final_position($param{'final_position'})
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	210 if exists $param{'final_position'};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	211
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	212 $complex->set_text_position_range($param{'position_range'})
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	213 if exists $param{'position_range'};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	214
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	215 $complex->set_minimal_distance($param{'minimal_distance'})
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	216 if exists $param{'minimal_distance'};
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	217 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	218
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	219 if ($CACHE_MAX) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	220 $_complex_usage_count{$P}->{$_param_key}++;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	221
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	222 # If our cache overfloweth.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	223 if (scalar keys %_complex_usage_count > $CACHE_MAX) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	224 _cf_complex($P);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	225 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	226 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	227
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	228 return ( $complex, %param );
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	229 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	230
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	231 sub cache_disable {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	232 cache_max(0);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	233 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	234
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	235 sub cache_flush_all {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	236 my $old_purge = cache_purge();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	237 cache_purge(1);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	238 _cf_simple('');
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	239 _cf_complex('');
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	240 cache_purge($old_purge);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	241 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	242
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	243 sub amatch {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	244 my $P = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	245 return 1 unless length $P;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	246 my $a = ((@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	247 _complex($P, @{ shift(@_) }) : _simple($P))[0];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	248
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	249 if (@_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	250 if (wantarray) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	251 return grep { $a->match($_) } @_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	252 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	253 foreach (@_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	254 return 1 if $a->match($_);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	255 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	256 return 0;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	257 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	258 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	259 if (defined $_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	260 if (wantarray) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	261 return $a->match($_) ? $_ : undef;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	262 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	263 return 1 if $a->match($_);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	264 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	265 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	266 return $a->match($_) if defined $_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	267
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	268 warn "amatch: \$_ is undefined: what are you matching?\n";
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	269 return;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	270 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	271
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	272 sub _find_substitute {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	273 my ($ri, $rs, $i, $s, $S, $rn) = @_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	274
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	275 push @{ $ri }, $i;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	276 push @{ $rs }, $s;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	277
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	278 my $pre = substr($_, 0, $i);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	279 my $old = substr($_, $i, $s);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	280 my $suf = substr($_, $i + $s);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	281 my $new = $S;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	282
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	283 $new =~ s/\$\`/$pre/g;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	284 $new =~ s/\$\&/$old/g;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	285 $new =~ s/\$\'/$suf/g;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	286
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	287 push @{ $rn }, $new;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	288 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	289
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	290 sub _do_substitute {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	291 my ($rn, $ri, $rs, $rS) = @_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	292
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	293 my $d = 0;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	294 my $n = $_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	295
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	296 foreach my $i (0..$#$rn) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	297 substr($n, $ri->[$i] + $d, $rs->[$i]) = $rn->[$i];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	298 $d += length($rn->[$i]) - $rs->[$i];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	299 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	300
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	301 push @{ $rS }, $n;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	302 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	303
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	304 sub asubstitute {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	305 my $P = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	306 my $S = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	307 my ($a, %p) =
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	308 (@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	309 _complex($P, @{ shift(@_) }) : _simple($P);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	310
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	311 my ($i, $s, @i, @s, @n, @S);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	312
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	313 if (@_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	314 if (exists $p{ g }) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	315 foreach (@_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	316 @s = @i = @n = ();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	317 while (($i, $s) = $a->slice_next($_)) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	318 if (defined $i) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	319 _find_substitute(\@i, \@s, $i, $s, $S, \@n);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	320 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	321 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	322 _do_substitute(\@n, \@i, \@s, \@S) if @n;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	323 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	324 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	325 foreach (@_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	326 @s = @i = @n = ();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	327 ($i, $s) = $a->slice($_);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	328 if (defined $i) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	329 _find_substitute(\@i, \@s, $i, $s, $S, \@n);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	330 _do_substitute(\@n, \@i, \@s, \@S);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	331 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	332 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	333 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	334 return @S;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	335 } elsif (defined $_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	336 if (exists $p{ g }) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	337 while (($i, $s) = $a->slice_next($_)) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	338 if (defined $i) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	339 _find_substitute(\@i, \@s, $i, $s, $S, \@n);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	340 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	341 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	342 _do_substitute(\@n, \@i, \@s, \@S) if @n;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	343 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	344 ($i, $s) = $a->slice($_);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	345 if (defined $i) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	346 _find_substitute(\@i, \@s, $i, $s, $S, \@n);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	347 _do_substitute(\@n, \@i, \@s, \@S);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	348 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	349 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	350 return $_ = $n[0];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	351 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	352 warn "asubstitute: \$_ is undefined: what are you substituting?\n";
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	353 return;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	354 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	355 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	356
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	357 sub aindex {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	358 my $P = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	359 return 0 unless length $P;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	360 my $a = ((@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	361 _complex($P, @{ shift(@_) }) : _simple($P))[0];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	362
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	363 $a->set_greedy; # The first match, thank you.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	364
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	365 if (@_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	366 if (wantarray) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	367 return map { $a->index($_) } @_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	368 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	369 return $a->index($_[0]);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	370 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	371 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	372 return $a->index($_) if defined $_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	373
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	374 warn "aindex: \$_ is undefined: what are you indexing?\n";
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	375 return;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	376 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	377
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	378 sub aslice {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	379 my $P = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	380 return (0, 0) unless length $P;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	381 my $a = ((@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	382 _complex($P, @{ shift(@_) }) : _simple($P))[0];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	383
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	384 $a->set_greedy; # The first match, thank you.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	385
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	386 if (@_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	387 return map { [ $a->slice($_) ] } @_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	388 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	389 return $a->slice($_) if defined $_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	390
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	391 warn "aslice: \$_ is undefined: what are you slicing?\n";
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	392 return;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	393 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	394
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	395 sub _adist {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	396 my $s0 = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	397 my $s1 = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	398 my ($aslice) = aslice($s0, ['minimal_distance', @_], $s1);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	399 my ($index, $size, $distance) = @$aslice;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	400 my ($l0, $l1) = map { length } ($s0, $s1);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	401 return $l0 <= $l1 ? $distance : -$distance;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	402 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	403
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	404 sub adist {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	405 my $a0 = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	406 my $a1 = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	407 if (length($a0) == 0) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	408 return length($a1);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	409 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	410 if (length($a1) == 0) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	411 return length($a0);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	412 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	413 my @m = ref $_[0] eq 'ARRAY' ? @{shift()} : ();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	414 if (ref $a0 eq 'ARRAY') {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	415 if (ref $a1 eq 'ARRAY') {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	416 return [ map { adist($a0, $_, @m) } @{$a1} ];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	417 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	418 return [ map { _adist($_, $a1, @m) } @{$a0} ];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	419 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	420 } elsif (ref $a1 eq 'ARRAY') {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	421 return [ map { _adist($a0, $_, @m) } @{$a1} ];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	422 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	423 if (wantarray) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	424 return map { _adist($a0, $_, @m) } ($a1, @_);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	425 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	426 return _adist($a0, $a1, @m);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	427 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	428 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	429 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	430
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	431 sub adistr {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	432 my $a0 = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	433 my $a1 = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	434 my @m = ref $_[0] eq 'ARRAY' ? shift : ();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	435 if (ref $a0 eq 'ARRAY') {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	436 if (ref $a1 eq 'ARRAY') {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	437 my $l0 = length();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	438 return $l0 ? [ map { adist($a0, $_, @m) }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	439 @{$a1} ] :
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	440 [ ];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	441 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	442 return [ map { my $l0 = length();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	443 $l0 ? _adist($_, $a1, @m) / $l0 : undef
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	444 } @{$a0} ];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	445 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	446 } elsif (ref $a1 eq 'ARRAY') {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	447 my $l0 = length($a0);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	448 return [] unless $l0;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	449 return [ map { _adist($a0, $_, @m) / $l0 } @{$a1} ];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	450 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	451 my $l0 = length($a0);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	452 if (wantarray) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	453 return map { $l0 ? _adist($a0, $_, @m) / $l0 : undef } ($a1, @_);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	454 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	455 return undef unless $l0;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	456 return _adist($a0, $a1, @m) / $l0;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	457 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	458 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	459 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	460
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	461 sub adistword {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	462 return adist($_[0], $_[1], ['position_range=0']);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	463 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	464
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	465 sub adistrword {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	466 return adistr($_[0], $_[1], ['position_range=0']);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	467 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	468
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	469 sub arindex {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	470 my $P = shift;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	471 my $l = length $P;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	472 return 0 unless $l;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	473 my $R = reverse $P;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	474 my $a = ((@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	475 _complex($R, @{ shift(@_) }) : _simple($R))[0];
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	476
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	477 $a->set_greedy; # The first match, thank you.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	478
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	479 if (@_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	480 if (wantarray) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	481 return map {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	482 my $aindex = $a->index(scalar reverse());
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	483 $aindex == -1 ? $aindex : (length($_) - $aindex - $l);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	484 } @_;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	485 } else {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	486 my $aindex = $a->index(scalar reverse $_[0]);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	487 return $aindex == -1 ? $aindex : (length($_[0]) - $aindex - $l);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	488 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	489 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	490 if (defined $_) {
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	491 my $aindex = $a->index(scalar reverse());
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	492 return $aindex == -1 ? $aindex : (length($_) - $aindex - $l);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	493 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	494
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	495 warn "arindex: \$_ is undefined: what are you indexing?\n";
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	496 return;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	497 }
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	498
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	499 1;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	500 __END__
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	501 =pod
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	502
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	503 =head1 NAME
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	504
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	505 String::Approx - Perl extension for approximate matching (fuzzy matching)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	506
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	507 =head1 SYNOPSIS
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	508
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	509 use String::Approx 'amatch';
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	510
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	511 print if amatch("foobar");
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	512
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	513 my @matches = amatch("xyzzy", @inputs);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	514
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	515 my @catches = amatch("plugh", ['2'], @inputs);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	516
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	517 =head1 DESCRIPTION
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	518
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	519 String::Approx lets you match and substitute strings approximately.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	520 With this you can emulate errors: typing errorrs, speling errors,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	521 closely related vocabularies (colour color), genetic mutations (GAG
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	522 ACT), abbreviations (McScot, MacScot).
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	523
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	524 NOTE: String::Approx suits the task of B<string matching>, not
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	525 B<string comparison>, and it works for B<strings>, not for B<text>.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	526
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	527 If you want to compare strings for similarity, you probably just want
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	528 the Levenshtein edit distance (explained below), the Text::Levenshtein
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	529 and Text::LevenshteinXS modules in CPAN. See also Text::WagnerFischer
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	530 and Text::PhraseDistance. (There are functions for this in String::Approx,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	531 e.g. adist(), but their results sometimes differ from the bare Levenshtein
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	532 et al.)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	533
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	534 If you want to compare things like text or source code, consisting of
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	535 B<words> or B<tokens> and B<phrases> and B<sentences>, or
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	536 B<expressions> and B<statements>, you should probably use some other
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	537 tool than String::Approx, like for example the standard UNIX diff(1)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	538 tool, or the Algorithm::Diff module from CPAN.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	539
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	540 The measure of B<approximateness> is the I<Levenshtein edit distance>.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	541 It is the total number of "edits": insertions,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	542
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	543 word world
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	544
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	545 deletions,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	546
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	547 monkey money
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	548
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	549 and substitutions
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	550
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	551 sun fun
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	552
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	553 required to transform a string to another string. For example, to
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	554 transform I<"lead"> into I<"gold">, you need three edits:
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	555
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	556 lead gead goad gold
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	557
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	558 The edit distance of "lead" and "gold" is therefore three, or 75%.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	559
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	560 B<String::Approx> uses the Levenshtein edit distance as its measure, but
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	561 String::Approx is not well-suited for comparing strings of different
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	562 length, in other words, if you want a "fuzzy eq", see above.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	563 String::Approx is more like regular expressions or index(), it finds
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	564 substrings that are close matches.>
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	565
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	566 =head1 MATCH
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	567
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	568 use String::Approx 'amatch';
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	569
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	570 $matched = amatch("pattern")
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	571 $matched = amatch("pattern", [ modifiers ])
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	572
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	573 $any_matched = amatch("pattern", @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	574 $any_matched = amatch("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	575
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	576 @match = amatch("pattern")
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	577 @match = amatch("pattern", [ modifiers ])
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	578
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	579 @matches = amatch("pattern", @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	580 @matches = amatch("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	581
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	582 Match B<pattern> approximately. In list context return the matched
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	583 B<@inputs>. If no inputs are given, match against the B<$_>. In scalar
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	584 context return true if I<any> of the inputs match, false if none match.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	585
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	586 Notice that the pattern is a string. Not a regular expression. None
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	587 of the regular expression notations (^, ., *, and so on) work. They
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	588 are characters just like the others. Note-on-note: some limited form
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	589 of I<"regular expressionism"> is planned in future: for example
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	590 character classes ([abc]) and I<any-chars> (.). But that feature will
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	591 be turned on by a special I<modifier> (just a guess: "r"), so there
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	592 should be no backward compatibility problem.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	593
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	594 Notice also that matching is not symmetric. The inputs are matched
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	595 against the pattern, not the other way round. In other words: the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	596 pattern can be a substring, a submatch, of an input element. An input
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	597 element is always a superstring of the pattern.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	598
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	599 =head2 MODIFIERS
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	600
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	601 With the modifiers you can control the amount of approximateness and
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	602 certain other control variables. The modifiers are one or more
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	603 strings, for example B<"i">, within a string optionally separated by
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	604 whitespace. The modifiers are inside an anonymous array: the B<[ ]>
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	605 in the syntax are not notational, they really do mean B<[ ]>, for
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	606 example B<[ "i", "2" ]>. B<["2 i"]> would be identical.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	607
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	608 The implicit default approximateness is 10%, rounded up. In other
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	609 words: every tenth character in the pattern may be an error, an edit.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	610 You can explicitly set the maximum approximateness by supplying a
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	611 modifier like
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	612
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	613 number
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	614 number%
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	615
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	616 Examples: B<"3">, B<"15%">.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	617
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	618 Note that C<0%> is not rounded up, it is equal to C<0>.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	619
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	620 Using a similar syntax you can separately control the maximum number
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	621 of insertions, deletions, and substitutions by prefixing the numbers
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	622 with I, D, or S, like this:
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	623
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	624 Inumber
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	625 Inumber%
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	626 Dnumber
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	627 Dnumber%
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	628 Snumber
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	629 Snumber%
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	630
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	631 Examples: B<"I2">, B<"D20%">, B<"S0">.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	632
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	633 You can ignore case (B<"A"> becames equal to B<"a"> and vice versa)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	634 by adding the B<"i"> modifier.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	635
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	636 For example
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	637
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	638 [ "i 25%", "S0" ]
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	639
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	640 means I<ignore case>, I<allow every fourth character to be "an edit">,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	641 but allow I<no substitutions>. (See L<NOTES> about disallowing
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	642 substitutions or insertions.)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	643
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	644 NOTE: setting C<I0 D0 S0> is not equivalent to using index().
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	645 If you want to use index(), use index().
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	646
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	647 =head1 SUBSTITUTE
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	648
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	649 use String::Approx 'asubstitute';
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	650
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	651 @substituted = asubstitute("pattern", "replacement")
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	652 @substituted = asubstitute("pattern", "replacement", @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	653 @substituted = asubstitute("pattern", "replacement", [ modifiers ])
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	654 @substituted = asubstitute("pattern", "replacement",
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	655 [ modifiers ], @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	656
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	657 Substitute approximate B<pattern> with B<replacement> and return as a
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	658 list <copies> of B<@inputs>, the substitutions having been made on the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	659 elements that did match the pattern. If no inputs are given,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	660 substitute in the B<$_>. The replacement can contain magic strings
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	661 B<$&>, B<$`>, B<$'> that stand for the matched string, the string
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	662 before it, and the string after it, respectively. All the other
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	663 arguments are as in C<amatch()>, plus one additional modifier, B<"g">
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	664 which means substitute globally (all the matches in an element and not
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	665 just the first one, as is the default).
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	666
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	667 See L<BAD NEWS> about the unfortunate stinginess of C<asubstitute()>.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	668
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	669 =head1 INDEX
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	670
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	671 use String::Approx 'aindex';
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	672
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	673 $index = aindex("pattern")
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	674 @indices = aindex("pattern", @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	675 $index = aindex("pattern", [ modifiers ])
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	676 @indices = aindex("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	677
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	678 Like C<amatch()> but returns the index/indices at which the pattern
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	679 matches approximately. In list context and if C<@inputs> are used,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	680 returns a list of indices, one index for each input element.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	681 If there's no approximate match, C<-1> is returned as the index.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	682
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	683 NOTE: if there is character repetition (e.g. "aa") either in
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	684 the pattern or in the text, the returned index might start
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	685 "too early". This is consistent with the goal of the module
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	686 of matching "as early as possible", just like regular expressions
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	687 (that there might be a "less approximate" match starting later is
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	688 of somewhat irrelevant).
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	689
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	690 There's also backwards-scanning C<arindex()>.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	691
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	692 =head1 SLICE
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	693
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	694 use String::Approx 'aslice';
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	695
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	696 ($index, $size) = aslice("pattern")
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	697 ([$i0, $s0], ...) = aslice("pattern", @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	698 ($index, $size) = aslice("pattern", [ modifiers ])
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	699 ([$i0, $s0], ...) = aslice("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	700
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	701 Like C<aindex()> but returns also the size (length) of the match.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	702 If the match fails, returns an empty list (when matching against C<$_>)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	703 or an empty anonymous list corresponding to the particular input.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	704
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	705 NOTE: size of the match will very probably be something you did not
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	706 expect (such as longer than the pattern, or a negative number). This
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	707 may or may not be fixed in future releases. Also the beginning of the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	708 match may vary from the expected as with aindex(), see above.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	709
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	710 If the modifier
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	711
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	712 "minimal_distance"
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	713
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	714 is used, the minimal possible edit distance is returned as the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	715 third element:
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	716
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	717 ($index, $size, $distance) = aslice("pattern", [ modifiers ])
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	718 ([$i0, $s0, $d0], ...) = aslice("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	719
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	720 =head1 DISTANCE
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	721
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	722 use String::Approx 'adist';
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	723
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	724 $dist = adist("pattern", $input);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	725 @dist = adist("pattern", @input);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	726
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	727 Return the I<edit distance> or distances between the pattern and the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	728 input or inputs. Zero edit distance means exact match. (Remember
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	729 that the match can 'float' in the inputs, the match is a substring
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	730 match.) If the pattern is longer than the input or inputs, the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	731 returned distance or distances is or are negative.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	732
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	733 use String::Approx 'adistr';
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	734
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	735 $dist = adistr("pattern", $input);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	736 @dist = adistr("pattern", @inputs);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	737
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	738 Return the B<relative> I<edit distance> or distances between the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	739 pattern and the input or inputs. Zero relative edit distance means
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	740 exact match, one means completely different. (Remember that the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	741 match can 'float' in the inputs, the match is a substring match.) If
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	742 the pattern is longer than the input or inputs, the returned distance
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	743 or distances is or are negative.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	744
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	745 You can use adist() or adistr() to sort the inputs according to their
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	746 approximateness:
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	747
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	748 my %d;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	749 @d{@inputs} = map { abs } adistr("pattern", @inputs);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	750 my @d = sort { $d{$a} <=> $d{$b} } @inputs;
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	751
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	752 Now C<@d> contains the inputs, the most like C<"pattern"> first.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	753
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	754 =head1 CONTROLLING THE CACHE
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	755
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	756 C<String::Approx> maintains a LU (least-used) cache that holds the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	757 'matching engines' for each instance of a I<pattern+modifiers>. The
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	758 cache is intended to help the case where you match a small set of
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	759 patterns against a large set of string. However, the more engines you
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	760 cache the more you eat memory. If you have a lot of different
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	761 patterns or if you have a lot of memory to burn, you may want to
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	762 control the cache yourself. For example, allowing a larger cache
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	763 consumes more memory but probably runs a little bit faster since the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	764 cache fills (and needs flushing) less often.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	765
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	766 The cache has two parameters: I<max> and I<purge>. The first one
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	767 is the maximum size of the cache and the second one is the cache
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	768 flushing ratio: when the number of cache entries exceeds I<max>,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	769 I<max> times I<purge> cache entries are flushed. The default
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	770 values are 1000 and 0.75, respectively, which means that when
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	771 the 1001st entry would be cached, 750 least used entries will
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	772 be removed from the cache. To access the parameters you can
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	773 use the calls
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	774
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	775 $now_max = String::Approx::cache_max();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	776 String::Approx::cache_max($new_max);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	777
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	778 $now_purge = String::Approx::cache_purge();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	779 String::Approx::cache_purge($new_purge);
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	780
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	781 $limit = String::Approx::cache_n_purge();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	782
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	783 To be honest, there are actually B<two> caches: the first one is used
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	784 far the patterns with no modifiers, the second one for the patterns
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	785 with pattern modifiers. Using the standard parameters you will
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	786 therefore actually cache up to 2000 entries. The above calls control
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	787 both caches for the same price.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	788
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	789 To disable caching completely use
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	790
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	791 String::Approx::cache_disable();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	792
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	793 Note that this doesn't flush any possibly existing cache entries,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	794 to do that use
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	795
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	796 String::Approx::cache_flush_all();
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	797
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	798 =head1 NOTES
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	799
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	800 Because matching is by I<substrings>, not by whole strings, insertions
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	801 and substitutions produce often very similar results: "abcde" matches
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	802 "axbcde" either by insertion B<or> substitution of "x".
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	803
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	804 The maximum edit distance is also the maximum number of edits.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	805 That is, the B<"I2"> in
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	806
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	807 amatch("abcd", ["I2"])
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	808
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	809 is useless because the maximum edit distance is (implicitly) 1.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	810 You may have meant to say
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	811
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	812 amatch("abcd", ["2D1S1"])
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	813
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	814 or something like that.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	815
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	816 If you want to simulate transposes
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	817
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	818 feet fete
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	819
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	820 you need to allow at least edit distance of two because in terms of
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	821 our edit primitives a transpose is first one deletion and then one
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	822 insertion.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	823
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	824 =head2 TEXT POSITION
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	825
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	826 The starting and ending positions of matching, substituting, indexing, or
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	827 slicing can be changed from the beginning and end of the input(s) to
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	828 some other positions by using either or both of the modifiers
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	829
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	830 "initial_position=24"
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	831 "final_position=42"
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	832
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	833 or the both the modifiers
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	834
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	835 "initial_position=24"
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	836 "position_range=10"
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	837
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	838 By setting the B<"position_range"> to be zero you can limit
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	839 (anchor) the operation to happen only once (if a match is possible)
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	840 at the position.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	841
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	842 =head1 VERSION
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	843
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	844 Major release 3.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	845
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	846 =head1 CHANGES FROM VERSION 2
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	847
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	848 =head2 GOOD NEWS
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	849
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	850 =over 4
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	851
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	852 =item The version 3 is 2-3 times faster than version 2
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	853
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	854 =item No pattern length limitation
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	855
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	856 The algorithm is independent on the pattern length: its time
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	857 complexity is I<O(kn)>, where I<k> is the number of edits and I<n> the
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	858 length of the text (input). The preprocessing of the pattern will of
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	859 course take some I<O(m)> (I<m> being the pattern length) time, but
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	860 C<amatch()> and C<asubstitute()> cache the result of this
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	861 preprocessing so that it is done only once per pattern.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	862
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	863 =back
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	864
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	865 =head2 BAD NEWS
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	866
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	867 =over 4
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	868
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	869 =item You do need a C compiler to install the module
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	870
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	871 Perl's regular expressions are no more used; instead a faster and more
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	872 scalable algorithm written in C is used.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	873
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	874 =item C<asubstitute()> is now always stingy
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	875
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	876 The string matched and substituted is now always stingy, as short
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	877 as possible. It used to be as long as possible. This is an unfortunate
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	878 change stemming from switching the matching algorithm. Example: with
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	879 edit distance of two and substituting for B<"word"> from B<"cork"> and
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	880 B<"wool"> previously did match B<"cork"> and B<"wool">. Now it does
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	881 match B<"or"> and B<"wo">. As little as possible, or, in other words,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	882 with as much approximateness, as many edits, as possible. Because
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	883 there is no I<need> to match the B<"c"> of B<"cork">, it is not matched.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	884
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	885 =item no more C<aregex()> because regular expressions are no more used
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	886
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	887 =item no more C<compat1> for String::Approx version 1 compatibility
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	888
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	889 =back
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	890
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	891 =head1 ACKNOWLEDGEMENTS
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	892
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	893 The following people have provided valuable test cases, documentation
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	894 clarifications, and other feedback:
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	895
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	896 Jared August, Arthur Bergman, Anirvan Chatterjee, Steve A. Chervitz,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	897 Aldo Calpini, David Curiel, Teun van den Dool, Alberto Fontaneda,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	898 Rob Fugina, Dmitrij Frishman, Lars Gregersen, Kevin Greiner,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	899 B. Elijah Griffin, Mike Hanafey, Mitch Helle, Ricky Houghton,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	900 'idallen', Helmut Jarausch, Damian Keefe, Ben Kennedy, Craig Kelley,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	901 Franz Kirsch, Dag Kristian, Mark Land, J. D. Laub, John P. Linderman,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	902 Tim Maher, Juha Muilu, Sergey Novoselov, Andy Oram, Ji Y Park,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	903 Eric Promislow, Nikolaus Rath, Stefan Ram, Slaven Rezic,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	904 Dag Kristian Rognlien, Stewart Russell, Slaven Rezic, Chris Rosin,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	905 Pasha Sadri, Ilya Sandler, Bob J.A. Schijvenaars, Ross Smith,
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	906 Frank Tobin, Greg Ward, Rich Williams, Rick Wise.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	907
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	908 The matching algorithm was developed by Udi Manber, Sun Wu, and Burra
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	909 Gopal in the Department of Computer Science, University of Arizona.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	910
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	911 =head1 AUTHOR
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	912
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	913 Jarkko Hietaniemi <jhi@iki.fi>
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	914
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	915 =head1 COPYRIGHT AND LICENSE
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	916
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	917 Copyright 2001-2013 by Jarkko Hietaniemi
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	918
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	919 This library is free software; you can redistribute it and/or modify
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	920 under either the terms of the Artistic License 2.0, or the GNU Library
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	921 General Public License, Version 2. See the files Artistic and LGPL
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	922 for more details.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	923
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	924 Furthermore: no warranties or obligations of any kind are given, and
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	925 the separate file F<COPYRIGHT> must be included intact in all copies
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	926 and derived materials.
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	927
e3609c8714fb Uploaded plus91-technologies-pvt-ltd parents: diff changeset	928 =cut

Mercurial > repos > plus91-technologies-pvt-ltd > softsearch

annotate 2.4/lib/perl5/x86_64-linux-gnu-thread-multi/String/Approx.pm @ 18:1163c16cb3c0 draft