0
|
1 package Statistics::Distributions;
|
|
2
|
|
3 use strict;
|
|
4 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
|
|
5 use constant PI => 3.1415926536;
|
|
6 use constant SIGNIFICANT => 5; # number of significant digits to be returned
|
|
7
|
|
8 require Exporter;
|
|
9
|
|
10 @ISA = qw(Exporter AutoLoader);
|
|
11 # Items to export into callers namespace by default. Note: do not export
|
|
12 # names by default without a very good reason. Use EXPORT_OK instead.
|
|
13 # Do not simply export all your public functions/methods/constants.
|
|
14 @EXPORT_OK = qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob);
|
|
15 $VERSION = '1.02';
|
|
16
|
|
17 # Preloaded methods go here.
|
|
18
|
|
19 sub chisqrdistr { # Percentage points X^2(x^2,n)
|
|
20 my ($n, $p) = @_;
|
|
21 if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
|
|
22 die "Invalid n: $n\n"; # degree of freedom
|
|
23 }
|
|
24 if ($p <= 0 || $p > 1) {
|
|
25 die "Invalid p: $p\n";
|
|
26 }
|
|
27 return precision_string(_subchisqr($n, $p));
|
|
28 }
|
|
29
|
|
30 sub udistr { # Percentage points N(0,1^2)
|
|
31 my ($p) = (@_);
|
|
32 if ($p > 1 || $p <= 0) {
|
|
33 die "Invalid p: $p\n";
|
|
34 }
|
|
35 return precision_string(_subu($p));
|
|
36 }
|
|
37
|
|
38 sub tdistr { # Percentage points t(x,n)
|
|
39 my ($n, $p) = @_;
|
|
40 if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
|
|
41 die "Invalid n: $n\n";
|
|
42 }
|
|
43 if ($p <= 0 || $p >= 1) {
|
|
44 die "Invalid p: $p\n";
|
|
45 }
|
|
46 return precision_string(_subt($n, $p));
|
|
47 }
|
|
48
|
|
49 sub fdistr { # Percentage points F(x,n1,n2)
|
|
50 my ($n, $m, $p) = @_;
|
|
51 if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
|
|
52 die "Invalid n: $n\n"; # first degree of freedom
|
|
53 }
|
|
54 if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
|
|
55 die "Invalid m: $m\n"; # second degree of freedom
|
|
56 }
|
|
57 if (($p<=0) || ($p>1)) {
|
|
58 die "Invalid p: $p\n";
|
|
59 }
|
|
60 return precision_string(_subf($n, $m, $p));
|
|
61 }
|
|
62
|
|
63 sub uprob { # Upper probability N(0,1^2)
|
|
64 my ($x) = @_;
|
|
65 return precision_string(_subuprob($x));
|
|
66 }
|
|
67
|
|
68 sub chisqrprob { # Upper probability X^2(x^2,n)
|
|
69 my ($n,$x) = @_;
|
|
70 if (($n <= 0) || ((abs($n) - (abs(int($n)))) != 0)) {
|
|
71 die "Invalid n: $n\n"; # degree of freedom
|
|
72 }
|
|
73 return precision_string(_subchisqrprob($n, $x));
|
|
74 }
|
|
75
|
|
76 sub tprob { # Upper probability t(x,n)
|
|
77 my ($n, $x) = @_;
|
|
78 if (($n <= 0) || ((abs($n) - abs(int($n))) !=0)) {
|
|
79 die "Invalid n: $n\n"; # degree of freedom
|
|
80 }
|
|
81 return precision_string(_subtprob($n, $x));
|
|
82 }
|
|
83
|
|
84 sub fprob { # Upper probability F(x,n1,n2)
|
|
85 my ($n, $m, $x) = @_;
|
|
86 if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
|
|
87 die "Invalid n: $n\n"; # first degree of freedom
|
|
88 }
|
|
89 if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
|
|
90 die "Invalid m: $m\n"; # second degree of freedom
|
|
91 }
|
|
92 return precision_string(_subfprob($n, $m, $x));
|
|
93 }
|
|
94
|
|
95
|
|
96 sub _subfprob {
|
|
97 my ($n, $m, $x) = @_;
|
|
98 my $p;
|
|
99
|
|
100 if ($x<=0) {
|
|
101 $p=1;
|
|
102 } elsif ($m % 2 == 0) {
|
|
103 my $z = $m / ($m + $n * $x);
|
|
104 my $a = 1;
|
|
105 for (my $i = $m - 2; $i >= 2; $i -= 2) {
|
|
106 $a = 1 + ($n + $i - 2) / $i * $z * $a;
|
|
107 }
|
|
108 $p = 1 - ((1 - $z) ** ($n / 2) * $a);
|
|
109 } elsif ($n % 2 == 0) {
|
|
110 my $z = $n * $x / ($m + $n * $x);
|
|
111 my $a = 1;
|
|
112 for (my $i = $n - 2; $i >= 2; $i -= 2) {
|
|
113 $a = 1 + ($m + $i - 2) / $i * $z * $a;
|
|
114 }
|
|
115 $p = (1 - $z) ** ($m / 2) * $a;
|
|
116 } else {
|
|
117 my $y = atan2(sqrt($n * $x / $m), 1);
|
|
118 my $z = sin($y) ** 2;
|
|
119 my $a = ($n == 1) ? 0 : 1;
|
|
120 for (my $i = $n - 2; $i >= 3; $i -= 2) {
|
|
121 $a = 1 + ($m + $i - 2) / $i * $z * $a;
|
|
122 }
|
|
123 my $b = PI;
|
|
124 for (my $i = 2; $i <= $m - 1; $i += 2) {
|
|
125 $b *= ($i - 1) / $i;
|
|
126 }
|
|
127 my $p1 = 2 / $b * sin($y) * cos($y) ** $m * $a;
|
|
128
|
|
129 $z = cos($y) ** 2;
|
|
130 $a = ($m == 1) ? 0 : 1;
|
|
131 for (my $i = $m-2; $i >= 3; $i -= 2) {
|
|
132 $a = 1 + ($i - 1) / $i * $z * $a;
|
|
133 }
|
|
134 $p = max(0, $p1 + 1 - 2 * $y / PI
|
|
135 - 2 / PI * sin($y) * cos($y) * $a);
|
|
136 }
|
|
137 return $p;
|
|
138 }
|
|
139
|
|
140
|
|
141 sub _subchisqrprob {
|
|
142 my ($n,$x) = @_;
|
|
143 my $p;
|
|
144
|
|
145 if ($x <= 0) {
|
|
146 $p = 1;
|
|
147 } elsif ($n > 100) {
|
|
148 $p = _subuprob((($x / $n) ** (1/3)
|
|
149 - (1 - 2/9/$n)) / sqrt(2/9/$n));
|
|
150 } elsif ($x > 400) {
|
|
151 $p = 0;
|
|
152 } else {
|
|
153 my ($a, $i, $i1);
|
|
154 if (($n % 2) != 0) {
|
|
155 $p = 2 * _subuprob(sqrt($x));
|
|
156 $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
|
|
157 $i1 = 1;
|
|
158 } else {
|
|
159 $p = $a = exp(-$x/2);
|
|
160 $i1 = 2;
|
|
161 }
|
|
162
|
|
163 for ($i = $i1; $i <= ($n-2); $i += 2) {
|
|
164 $a *= $x / $i;
|
|
165 $p += $a;
|
|
166 }
|
|
167 }
|
|
168 return $p;
|
|
169 }
|
|
170
|
|
171 sub _subu {
|
|
172 my ($p) = @_;
|
|
173 my $y = -log(4 * $p * (1 - $p));
|
|
174 my $x = sqrt(
|
|
175 $y * (1.570796288
|
|
176 + $y * (.03706987906
|
|
177 + $y * (-.8364353589E-3
|
|
178 + $y *(-.2250947176E-3
|
|
179 + $y * (.6841218299E-5
|
|
180 + $y * (0.5824238515E-5
|
|
181 + $y * (-.104527497E-5
|
|
182 + $y * (.8360937017E-7
|
|
183 + $y * (-.3231081277E-8
|
|
184 + $y * (.3657763036E-10
|
|
185 + $y *.6936233982E-12)))))))))));
|
|
186 $x = -$x if ($p>.5);
|
|
187 return $x;
|
|
188 }
|
|
189
|
|
190 sub _subuprob {
|
|
191 my ($x) = @_;
|
|
192 my $p = 0; # if ($absx > 100)
|
|
193 my $absx = abs($x);
|
|
194
|
|
195 if ($absx < 1.9) {
|
|
196 $p = (1 +
|
|
197 $absx * (.049867347
|
|
198 + $absx * (.0211410061
|
|
199 + $absx * (.0032776263
|
|
200 + $absx * (.0000380036
|
|
201 + $absx * (.0000488906
|
|
202 + $absx * .000005383)))))) ** -16/2;
|
|
203 } elsif ($absx <= 100) {
|
|
204 for (my $i = 18; $i >= 1; $i--) {
|
|
205 $p = $i / ($absx + $p);
|
|
206 }
|
|
207 $p = exp(-.5 * $absx * $absx)
|
|
208 / sqrt(2 * PI) / ($absx + $p);
|
|
209 }
|
|
210
|
|
211 $p = 1 - $p if ($x<0);
|
|
212 return $p;
|
|
213 }
|
|
214
|
|
215
|
|
216 sub _subt {
|
|
217 my ($n, $p) = @_;
|
|
218
|
|
219 if ($p >= 1 || $p <= 0) {
|
|
220 die "Invalid p: $p\n";
|
|
221 }
|
|
222
|
|
223 if ($p == 0.5) {
|
|
224 return 0;
|
|
225 } elsif ($p < 0.5) {
|
|
226 return - _subt($n, 1 - $p);
|
|
227 }
|
|
228
|
|
229 my $u = _subu($p);
|
|
230 my $u2 = $u ** 2;
|
|
231
|
|
232 my $a = ($u2 + 1) / 4;
|
|
233 my $b = ((5 * $u2 + 16) * $u2 + 3) / 96;
|
|
234 my $c = (((3 * $u2 + 19) * $u2 + 17) * $u2 - 15) / 384;
|
|
235 my $d = ((((79 * $u2 + 776) * $u2 + 1482) * $u2 - 1920) * $u2 - 945)
|
|
236 / 92160;
|
|
237 my $e = (((((27 * $u2 + 339) * $u2 + 930) * $u2 - 1782) * $u2 - 765) * $u2
|
|
238 + 17955) / 368640;
|
|
239
|
|
240 my $x = $u * (1 + ($a + ($b + ($c + ($d + $e / $n) / $n) / $n) / $n) / $n);
|
|
241
|
|
242 if ($n <= log10($p) ** 2 + 3) {
|
|
243 my $round;
|
|
244 do {
|
|
245 my $p1 = _subtprob($n, $x);
|
|
246 my $n1 = $n + 1;
|
|
247 my $delta = ($p1 - $p)
|
|
248 / exp(($n1 * log($n1 / ($n + $x * $x))
|
|
249 + log($n/$n1/2/PI) - 1
|
|
250 + (1/$n1 - 1/$n) / 6) / 2);
|
|
251 $x += $delta;
|
|
252 $round = sprintf("%.".abs(int(log10(abs $x)-4))."f",$delta);
|
|
253 } while (($x) && ($round != 0));
|
|
254 }
|
|
255 return $x;
|
|
256 }
|
|
257
|
|
258 sub _subtprob {
|
|
259 my ($n, $x) = @_;
|
|
260
|
|
261 my ($a,$b);
|
|
262 my $w = atan2($x / sqrt($n), 1);
|
|
263 my $z = cos($w) ** 2;
|
|
264 my $y = 1;
|
|
265
|
|
266 for (my $i = $n-2; $i >= 2; $i -= 2) {
|
|
267 $y = 1 + ($i-1) / $i * $z * $y;
|
|
268 }
|
|
269
|
|
270 if ($n % 2 == 0) {
|
|
271 $a = sin($w)/2;
|
|
272 $b = .5;
|
|
273 } else {
|
|
274 $a = ($n == 1) ? 0 : sin($w)*cos($w)/PI;
|
|
275 $b= .5 + $w/PI;
|
|
276 }
|
|
277 return max(0, 1 - $b - $a * $y);
|
|
278 }
|
|
279
|
|
280 sub _subf {
|
|
281 my ($n, $m, $p) = @_;
|
|
282 my $x;
|
|
283
|
|
284 if ($p >= 1 || $p <= 0) {
|
|
285 die "Invalid p: $p\n";
|
|
286 }
|
|
287
|
|
288 if ($p == 1) {
|
|
289 $x = 0;
|
|
290 } elsif ($m == 1) {
|
|
291 $x = 1 / (_subt($n, 0.5 - $p / 2) ** 2);
|
|
292 } elsif ($n == 1) {
|
|
293 $x = _subt($m, $p/2) ** 2;
|
|
294 } elsif ($m == 2) {
|
|
295 my $u = _subchisqr($m, 1 - $p);
|
|
296 my $a = $m - 2;
|
|
297 $x = 1 / ($u / $m * (1 +
|
|
298 (($u - $a) / 2 +
|
|
299 (((4 * $u - 11 * $a) * $u + $a * (7 * $m - 10)) / 24 +
|
|
300 (((2 * $u - 10 * $a) * $u + $a * (17 * $m - 26)) * $u
|
|
301 - $a * $a * (9 * $m - 6)
|
|
302 )/48/$n
|
|
303 )/$n
|
|
304 )/$n));
|
|
305 } elsif ($n > $m) {
|
|
306 $x = 1 / _subf2($m, $n, 1 - $p)
|
|
307 } else {
|
|
308 $x = _subf2($n, $m, $p)
|
|
309 }
|
|
310 return $x;
|
|
311 }
|
|
312
|
|
313 sub _subf2 {
|
|
314 my ($n, $m, $p) = @_;
|
|
315 my $u = _subchisqr($n, $p);
|
|
316 my $n2 = $n - 2;
|
|
317 my $x = $u / $n *
|
|
318 (1 +
|
|
319 (($u - $n2) / 2 +
|
|
320 (((4 * $u - 11 * $n2) * $u + $n2 * (7 * $n - 10)) / 24 +
|
|
321 (((2 * $u - 10 * $n2) * $u + $n2 * (17 * $n - 26)) * $u
|
|
322 - $n2 * $n2 * (9 * $n - 6)) / 48 / $m) / $m) / $m);
|
|
323 my $delta;
|
|
324 do {
|
|
325 my $z = exp(
|
|
326 (($n+$m) * log(($n+$m) / ($n * $x + $m))
|
|
327 + ($n - 2) * log($x)
|
|
328 + log($n * $m / ($n+$m))
|
|
329 - log(4 * PI)
|
|
330 - (1/$n + 1/$m - 1/($n+$m))/6
|
|
331 )/2);
|
|
332 $delta = (_subfprob($n, $m, $x) - $p) / $z;
|
|
333 $x += $delta;
|
|
334 } while (abs($delta)>3e-4);
|
|
335 return $x;
|
|
336 }
|
|
337
|
|
338 sub _subchisqr {
|
|
339 my ($n, $p) = @_;
|
|
340 my $x;
|
|
341
|
|
342 if (($p > 1) || ($p <= 0)) {
|
|
343 die "Invalid p: $p\n";
|
|
344 } elsif ($p == 1){
|
|
345 $x = 0;
|
|
346 } elsif ($n == 1) {
|
|
347 $x = _subu($p / 2) ** 2;
|
|
348 } elsif ($n == 2) {
|
|
349 $x = -2 * log($p);
|
|
350 } else {
|
|
351 my $u = _subu($p);
|
|
352 my $u2 = $u * $u;
|
|
353
|
|
354 $x = max(0, $n + sqrt(2 * $n) * $u
|
|
355 + 2/3 * ($u2 - 1)
|
|
356 + $u * ($u2 - 7) / 9 / sqrt(2 * $n)
|
|
357 - 2/405 / $n * ($u2 * (3 *$u2 + 7) - 16));
|
|
358
|
|
359 if ($n <= 100) {
|
|
360 my ($x0, $p1, $z);
|
|
361 do {
|
|
362 $x0 = $x;
|
|
363 if ($x < 0) {
|
|
364 $p1 = 1;
|
|
365 } elsif ($n>100) {
|
|
366 $p1 = _subuprob((($x / $n)**(1/3) - (1 - 2/9/$n))
|
|
367 / sqrt(2/9/$n));
|
|
368 } elsif ($x>400) {
|
|
369 $p1 = 0;
|
|
370 } else {
|
|
371 my ($i0, $a);
|
|
372 if (($n % 2) != 0) {
|
|
373 $p1 = 2 * _subuprob(sqrt($x));
|
|
374 $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
|
|
375 $i0 = 1;
|
|
376 } else {
|
|
377 $p1 = $a = exp(-$x/2);
|
|
378 $i0 = 2;
|
|
379 }
|
|
380
|
|
381 for (my $i = $i0; $i <= $n-2; $i += 2) {
|
|
382 $a *= $x / $i;
|
|
383 $p1 += $a;
|
|
384 }
|
|
385 }
|
|
386 $z = exp((($n-1) * log($x/$n) - log(4*PI*$x)
|
|
387 + $n - $x - 1/$n/6) / 2);
|
|
388 $x += ($p1 - $p) / $z;
|
|
389 $x = sprintf("%.5f", $x);
|
|
390 } while (($n < 31) && (abs($x0 - $x) > 1e-4));
|
|
391 }
|
|
392 }
|
|
393 return $x;
|
|
394 }
|
|
395
|
|
396 sub log10 {
|
|
397 my $n = shift;
|
|
398 return log($n) / log(10);
|
|
399 }
|
|
400
|
|
401 sub max {
|
|
402 my $max = shift;
|
|
403 my $next;
|
|
404 while (@_) {
|
|
405 $next = shift;
|
|
406 $max = $next if ($next > $max);
|
|
407 }
|
|
408 return $max;
|
|
409 }
|
|
410
|
|
411 sub min {
|
|
412 my $min = shift;
|
|
413 my $next;
|
|
414 while (@_) {
|
|
415 $next = shift;
|
|
416 $min = $next if ($next < $min);
|
|
417 }
|
|
418 return $min;
|
|
419 }
|
|
420
|
|
421 sub precision {
|
|
422 my ($x) = @_;
|
|
423 return abs int(log10(abs $x) - SIGNIFICANT);
|
|
424 }
|
|
425
|
|
426 sub precision_string {
|
|
427 my ($x) = @_;
|
|
428 if ($x) {
|
|
429 return sprintf "%." . precision($x) . "f", $x;
|
|
430 } else {
|
|
431 return "0";
|
|
432 }
|
|
433 }
|
|
434
|
|
435
|
|
436 # Autoload methods go after =cut, and are processed by the autosplit program.
|
|
437
|
|
438 1;
|
|
439 __END__
|
|
440 # Below is the stub of documentation for your module. You better edit it!
|
|
441
|
|
442 =head1 NAME
|
|
443
|
|
444 Statistics::Distributions - Perl module for calculating critical values and upper probabilities of common statistical distributions
|
|
445
|
|
446 =head1 SYNOPSIS
|
|
447
|
|
448 use Statistics::Distributions;
|
|
449
|
|
450 $chis=Statistics::Distributions::chisqrdistr (2,.05);
|
|
451 print "Chi-squared-crit (2 degrees of freedom, 95th percentile "
|
|
452 ."= 0.05 level) = $chis\n";
|
|
453
|
|
454 $u=Statistics::Distributions::udistr (.05);
|
|
455 print "u-crit (95th percentile = 0.05 level) = $u\n";
|
|
456
|
|
457 $t=Statistics::Distributions::tdistr (1,.005);
|
|
458 print "t-crit (1 degree of freedom, 99.5th percentile = 0.005 level) "
|
|
459 ."= $t\n";
|
|
460
|
|
461 $f=Statistics::Distributions::fdistr (1,3,.01);
|
|
462 print "F-crit (1 degree of freedom in numerator, 3 degrees of freedom "
|
|
463 ."in denominator, 99th percentile = 0.01 level) = $f\n";
|
|
464
|
|
465 $uprob=Statistics::Distributions::uprob (-0.85);
|
|
466 print "upper probability of the u distribution (u = -0.85): Q(u) "
|
|
467 ."= 1-G(u) = $uprob\n";
|
|
468
|
|
469 $chisprob=Statistics::Distributions::chisqrprob (3,6.25);
|
|
470 print "upper probability of the chi-square distribution (3 degrees "
|
|
471 ."of freedom, chi-squared = 6.25): Q = 1-G = $chisprob\n";
|
|
472
|
|
473 $tprob=Statistics::Distributions::tprob (3,6.251);
|
|
474 print "upper probability of the t distribution (3 degrees of "
|
|
475 ."freedom, t = 6.251): Q = 1-G = $tprob\n";
|
|
476
|
|
477 $fprob=Statistics::Distributions::fprob (3,5,.625);
|
|
478 print "upper probability of the F distribution (3 degrees of freedom "
|
|
479 ."in numerator, 5 degrees of freedom in denominator, F = 6.25): "
|
|
480 ."Q = 1-G = $fprob\n";
|
|
481
|
|
482 =head1 DESCRIPTION
|
|
483
|
|
484 This Perl module calculates percentage points (5 significant digits) of the u (standard normal) distribution, the student's t distribution, the chi-square distribution and the F distribution. It can also calculate the upper probability (5 significant digits) of the u (standard normal), the chi-square, the t and the F distribution.
|
|
485 These critical values are needed to perform statistical tests, like the u test, the t test, the F test and the chi-squared test, and to calculate confidence intervals.
|
|
486
|
|
487 If you are interested in more precise algorithms you could look at:
|
|
488 StatLib: http://lib.stat.cmu.edu/apstat/ ;
|
|
489 Applied Statistics Algorithms by Griffiths, P. and Hill, I.D., Ellis Horwood: Chichester (1985)
|
|
490
|
|
491 =head1 BUGS
|
|
492
|
|
493 This final version 1.02 has been released after more than one year without a bug report on the previous version 0.07.
|
|
494 Nevertheless, if you find any bugs or oddities, please do inform the author.
|
|
495
|
|
496 =head1 INSTALLATION
|
|
497
|
|
498 See perlmodinstall for information and options on installing Perl modules.
|
|
499
|
|
500 =head1 AVAILABILITY
|
|
501
|
|
502 The latest version of this module is available from the Distribution Perl Archive Network (CPAN). Please visit http://www.cpan.org/ to find a CPAN site near you or see http://www.cpan.org/authors/id/M/MI/MIKEK/ .
|
|
503
|
|
504 =head1 AUTHOR
|
|
505
|
|
506 Michael Kospach <mike.perl@gmx.at>
|
|
507
|
|
508 Nice formating, simplification and bug repair by Matthias Trautner Kromann <mtk@id.cbs.dk>
|
|
509
|
|
510 =head1 COPYRIGHT
|
|
511
|
|
512 Copyright 2003 Michael Kospach. All rights reserved.
|
|
513
|
|
514 This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
|
515
|
|
516 =head1 SEE ALSO
|
|
517
|
|
518 Statistics::ChiSquare, Statistics::Table::t, Statistics::Table::F, perl(1).
|
|
519
|
|
520 =cut
|
|
521
|
|
522
|
|
523
|
|
524
|
|
525
|
|
526
|
|
527
|
|
528
|
|
529
|