comparison PGAP-1.2.1/Statistics/Distributions.pm @ 0:83e62a1aeeeb draft

Uploaded
author dereeper
date Thu, 24 Jun 2021 13:51:52 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:83e62a1aeeeb
1 package Statistics::Distributions;
2
3 use strict;
4 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
5 use constant PI => 3.1415926536;
6 use constant SIGNIFICANT => 5; # number of significant digits to be returned
7
8 require Exporter;
9
10 @ISA = qw(Exporter AutoLoader);
11 # Items to export into callers namespace by default. Note: do not export
12 # names by default without a very good reason. Use EXPORT_OK instead.
13 # Do not simply export all your public functions/methods/constants.
14 @EXPORT_OK = qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob);
15 $VERSION = '1.02';
16
17 # Preloaded methods go here.
18
19 sub chisqrdistr { # Percentage points X^2(x^2,n)
20 my ($n, $p) = @_;
21 if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
22 die "Invalid n: $n\n"; # degree of freedom
23 }
24 if ($p <= 0 || $p > 1) {
25 die "Invalid p: $p\n";
26 }
27 return precision_string(_subchisqr($n, $p));
28 }
29
30 sub udistr { # Percentage points N(0,1^2)
31 my ($p) = (@_);
32 if ($p > 1 || $p <= 0) {
33 die "Invalid p: $p\n";
34 }
35 return precision_string(_subu($p));
36 }
37
38 sub tdistr { # Percentage points t(x,n)
39 my ($n, $p) = @_;
40 if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
41 die "Invalid n: $n\n";
42 }
43 if ($p <= 0 || $p >= 1) {
44 die "Invalid p: $p\n";
45 }
46 return precision_string(_subt($n, $p));
47 }
48
49 sub fdistr { # Percentage points F(x,n1,n2)
50 my ($n, $m, $p) = @_;
51 if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
52 die "Invalid n: $n\n"; # first degree of freedom
53 }
54 if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
55 die "Invalid m: $m\n"; # second degree of freedom
56 }
57 if (($p<=0) || ($p>1)) {
58 die "Invalid p: $p\n";
59 }
60 return precision_string(_subf($n, $m, $p));
61 }
62
63 sub uprob { # Upper probability N(0,1^2)
64 my ($x) = @_;
65 return precision_string(_subuprob($x));
66 }
67
68 sub chisqrprob { # Upper probability X^2(x^2,n)
69 my ($n,$x) = @_;
70 if (($n <= 0) || ((abs($n) - (abs(int($n)))) != 0)) {
71 die "Invalid n: $n\n"; # degree of freedom
72 }
73 return precision_string(_subchisqrprob($n, $x));
74 }
75
76 sub tprob { # Upper probability t(x,n)
77 my ($n, $x) = @_;
78 if (($n <= 0) || ((abs($n) - abs(int($n))) !=0)) {
79 die "Invalid n: $n\n"; # degree of freedom
80 }
81 return precision_string(_subtprob($n, $x));
82 }
83
84 sub fprob { # Upper probability F(x,n1,n2)
85 my ($n, $m, $x) = @_;
86 if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
87 die "Invalid n: $n\n"; # first degree of freedom
88 }
89 if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
90 die "Invalid m: $m\n"; # second degree of freedom
91 }
92 return precision_string(_subfprob($n, $m, $x));
93 }
94
95
96 sub _subfprob {
97 my ($n, $m, $x) = @_;
98 my $p;
99
100 if ($x<=0) {
101 $p=1;
102 } elsif ($m % 2 == 0) {
103 my $z = $m / ($m + $n * $x);
104 my $a = 1;
105 for (my $i = $m - 2; $i >= 2; $i -= 2) {
106 $a = 1 + ($n + $i - 2) / $i * $z * $a;
107 }
108 $p = 1 - ((1 - $z) ** ($n / 2) * $a);
109 } elsif ($n % 2 == 0) {
110 my $z = $n * $x / ($m + $n * $x);
111 my $a = 1;
112 for (my $i = $n - 2; $i >= 2; $i -= 2) {
113 $a = 1 + ($m + $i - 2) / $i * $z * $a;
114 }
115 $p = (1 - $z) ** ($m / 2) * $a;
116 } else {
117 my $y = atan2(sqrt($n * $x / $m), 1);
118 my $z = sin($y) ** 2;
119 my $a = ($n == 1) ? 0 : 1;
120 for (my $i = $n - 2; $i >= 3; $i -= 2) {
121 $a = 1 + ($m + $i - 2) / $i * $z * $a;
122 }
123 my $b = PI;
124 for (my $i = 2; $i <= $m - 1; $i += 2) {
125 $b *= ($i - 1) / $i;
126 }
127 my $p1 = 2 / $b * sin($y) * cos($y) ** $m * $a;
128
129 $z = cos($y) ** 2;
130 $a = ($m == 1) ? 0 : 1;
131 for (my $i = $m-2; $i >= 3; $i -= 2) {
132 $a = 1 + ($i - 1) / $i * $z * $a;
133 }
134 $p = max(0, $p1 + 1 - 2 * $y / PI
135 - 2 / PI * sin($y) * cos($y) * $a);
136 }
137 return $p;
138 }
139
140
141 sub _subchisqrprob {
142 my ($n,$x) = @_;
143 my $p;
144
145 if ($x <= 0) {
146 $p = 1;
147 } elsif ($n > 100) {
148 $p = _subuprob((($x / $n) ** (1/3)
149 - (1 - 2/9/$n)) / sqrt(2/9/$n));
150 } elsif ($x > 400) {
151 $p = 0;
152 } else {
153 my ($a, $i, $i1);
154 if (($n % 2) != 0) {
155 $p = 2 * _subuprob(sqrt($x));
156 $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
157 $i1 = 1;
158 } else {
159 $p = $a = exp(-$x/2);
160 $i1 = 2;
161 }
162
163 for ($i = $i1; $i <= ($n-2); $i += 2) {
164 $a *= $x / $i;
165 $p += $a;
166 }
167 }
168 return $p;
169 }
170
171 sub _subu {
172 my ($p) = @_;
173 my $y = -log(4 * $p * (1 - $p));
174 my $x = sqrt(
175 $y * (1.570796288
176 + $y * (.03706987906
177 + $y * (-.8364353589E-3
178 + $y *(-.2250947176E-3
179 + $y * (.6841218299E-5
180 + $y * (0.5824238515E-5
181 + $y * (-.104527497E-5
182 + $y * (.8360937017E-7
183 + $y * (-.3231081277E-8
184 + $y * (.3657763036E-10
185 + $y *.6936233982E-12)))))))))));
186 $x = -$x if ($p>.5);
187 return $x;
188 }
189
190 sub _subuprob {
191 my ($x) = @_;
192 my $p = 0; # if ($absx > 100)
193 my $absx = abs($x);
194
195 if ($absx < 1.9) {
196 $p = (1 +
197 $absx * (.049867347
198 + $absx * (.0211410061
199 + $absx * (.0032776263
200 + $absx * (.0000380036
201 + $absx * (.0000488906
202 + $absx * .000005383)))))) ** -16/2;
203 } elsif ($absx <= 100) {
204 for (my $i = 18; $i >= 1; $i--) {
205 $p = $i / ($absx + $p);
206 }
207 $p = exp(-.5 * $absx * $absx)
208 / sqrt(2 * PI) / ($absx + $p);
209 }
210
211 $p = 1 - $p if ($x<0);
212 return $p;
213 }
214
215
216 sub _subt {
217 my ($n, $p) = @_;
218
219 if ($p >= 1 || $p <= 0) {
220 die "Invalid p: $p\n";
221 }
222
223 if ($p == 0.5) {
224 return 0;
225 } elsif ($p < 0.5) {
226 return - _subt($n, 1 - $p);
227 }
228
229 my $u = _subu($p);
230 my $u2 = $u ** 2;
231
232 my $a = ($u2 + 1) / 4;
233 my $b = ((5 * $u2 + 16) * $u2 + 3) / 96;
234 my $c = (((3 * $u2 + 19) * $u2 + 17) * $u2 - 15) / 384;
235 my $d = ((((79 * $u2 + 776) * $u2 + 1482) * $u2 - 1920) * $u2 - 945)
236 / 92160;
237 my $e = (((((27 * $u2 + 339) * $u2 + 930) * $u2 - 1782) * $u2 - 765) * $u2
238 + 17955) / 368640;
239
240 my $x = $u * (1 + ($a + ($b + ($c + ($d + $e / $n) / $n) / $n) / $n) / $n);
241
242 if ($n <= log10($p) ** 2 + 3) {
243 my $round;
244 do {
245 my $p1 = _subtprob($n, $x);
246 my $n1 = $n + 1;
247 my $delta = ($p1 - $p)
248 / exp(($n1 * log($n1 / ($n + $x * $x))
249 + log($n/$n1/2/PI) - 1
250 + (1/$n1 - 1/$n) / 6) / 2);
251 $x += $delta;
252 $round = sprintf("%.".abs(int(log10(abs $x)-4))."f",$delta);
253 } while (($x) && ($round != 0));
254 }
255 return $x;
256 }
257
258 sub _subtprob {
259 my ($n, $x) = @_;
260
261 my ($a,$b);
262 my $w = atan2($x / sqrt($n), 1);
263 my $z = cos($w) ** 2;
264 my $y = 1;
265
266 for (my $i = $n-2; $i >= 2; $i -= 2) {
267 $y = 1 + ($i-1) / $i * $z * $y;
268 }
269
270 if ($n % 2 == 0) {
271 $a = sin($w)/2;
272 $b = .5;
273 } else {
274 $a = ($n == 1) ? 0 : sin($w)*cos($w)/PI;
275 $b= .5 + $w/PI;
276 }
277 return max(0, 1 - $b - $a * $y);
278 }
279
280 sub _subf {
281 my ($n, $m, $p) = @_;
282 my $x;
283
284 if ($p >= 1 || $p <= 0) {
285 die "Invalid p: $p\n";
286 }
287
288 if ($p == 1) {
289 $x = 0;
290 } elsif ($m == 1) {
291 $x = 1 / (_subt($n, 0.5 - $p / 2) ** 2);
292 } elsif ($n == 1) {
293 $x = _subt($m, $p/2) ** 2;
294 } elsif ($m == 2) {
295 my $u = _subchisqr($m, 1 - $p);
296 my $a = $m - 2;
297 $x = 1 / ($u / $m * (1 +
298 (($u - $a) / 2 +
299 (((4 * $u - 11 * $a) * $u + $a * (7 * $m - 10)) / 24 +
300 (((2 * $u - 10 * $a) * $u + $a * (17 * $m - 26)) * $u
301 - $a * $a * (9 * $m - 6)
302 )/48/$n
303 )/$n
304 )/$n));
305 } elsif ($n > $m) {
306 $x = 1 / _subf2($m, $n, 1 - $p)
307 } else {
308 $x = _subf2($n, $m, $p)
309 }
310 return $x;
311 }
312
313 sub _subf2 {
314 my ($n, $m, $p) = @_;
315 my $u = _subchisqr($n, $p);
316 my $n2 = $n - 2;
317 my $x = $u / $n *
318 (1 +
319 (($u - $n2) / 2 +
320 (((4 * $u - 11 * $n2) * $u + $n2 * (7 * $n - 10)) / 24 +
321 (((2 * $u - 10 * $n2) * $u + $n2 * (17 * $n - 26)) * $u
322 - $n2 * $n2 * (9 * $n - 6)) / 48 / $m) / $m) / $m);
323 my $delta;
324 do {
325 my $z = exp(
326 (($n+$m) * log(($n+$m) / ($n * $x + $m))
327 + ($n - 2) * log($x)
328 + log($n * $m / ($n+$m))
329 - log(4 * PI)
330 - (1/$n + 1/$m - 1/($n+$m))/6
331 )/2);
332 $delta = (_subfprob($n, $m, $x) - $p) / $z;
333 $x += $delta;
334 } while (abs($delta)>3e-4);
335 return $x;
336 }
337
338 sub _subchisqr {
339 my ($n, $p) = @_;
340 my $x;
341
342 if (($p > 1) || ($p <= 0)) {
343 die "Invalid p: $p\n";
344 } elsif ($p == 1){
345 $x = 0;
346 } elsif ($n == 1) {
347 $x = _subu($p / 2) ** 2;
348 } elsif ($n == 2) {
349 $x = -2 * log($p);
350 } else {
351 my $u = _subu($p);
352 my $u2 = $u * $u;
353
354 $x = max(0, $n + sqrt(2 * $n) * $u
355 + 2/3 * ($u2 - 1)
356 + $u * ($u2 - 7) / 9 / sqrt(2 * $n)
357 - 2/405 / $n * ($u2 * (3 *$u2 + 7) - 16));
358
359 if ($n <= 100) {
360 my ($x0, $p1, $z);
361 do {
362 $x0 = $x;
363 if ($x < 0) {
364 $p1 = 1;
365 } elsif ($n>100) {
366 $p1 = _subuprob((($x / $n)**(1/3) - (1 - 2/9/$n))
367 / sqrt(2/9/$n));
368 } elsif ($x>400) {
369 $p1 = 0;
370 } else {
371 my ($i0, $a);
372 if (($n % 2) != 0) {
373 $p1 = 2 * _subuprob(sqrt($x));
374 $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
375 $i0 = 1;
376 } else {
377 $p1 = $a = exp(-$x/2);
378 $i0 = 2;
379 }
380
381 for (my $i = $i0; $i <= $n-2; $i += 2) {
382 $a *= $x / $i;
383 $p1 += $a;
384 }
385 }
386 $z = exp((($n-1) * log($x/$n) - log(4*PI*$x)
387 + $n - $x - 1/$n/6) / 2);
388 $x += ($p1 - $p) / $z;
389 $x = sprintf("%.5f", $x);
390 } while (($n < 31) && (abs($x0 - $x) > 1e-4));
391 }
392 }
393 return $x;
394 }
395
396 sub log10 {
397 my $n = shift;
398 return log($n) / log(10);
399 }
400
401 sub max {
402 my $max = shift;
403 my $next;
404 while (@_) {
405 $next = shift;
406 $max = $next if ($next > $max);
407 }
408 return $max;
409 }
410
411 sub min {
412 my $min = shift;
413 my $next;
414 while (@_) {
415 $next = shift;
416 $min = $next if ($next < $min);
417 }
418 return $min;
419 }
420
421 sub precision {
422 my ($x) = @_;
423 return abs int(log10(abs $x) - SIGNIFICANT);
424 }
425
426 sub precision_string {
427 my ($x) = @_;
428 if ($x) {
429 return sprintf "%." . precision($x) . "f", $x;
430 } else {
431 return "0";
432 }
433 }
434
435
436 # Autoload methods go after =cut, and are processed by the autosplit program.
437
438 1;
439 __END__
440 # Below is the stub of documentation for your module. You better edit it!
441
442 =head1 NAME
443
444 Statistics::Distributions - Perl module for calculating critical values and upper probabilities of common statistical distributions
445
446 =head1 SYNOPSIS
447
448 use Statistics::Distributions;
449
450 $chis=Statistics::Distributions::chisqrdistr (2,.05);
451 print "Chi-squared-crit (2 degrees of freedom, 95th percentile "
452 ."= 0.05 level) = $chis\n";
453
454 $u=Statistics::Distributions::udistr (.05);
455 print "u-crit (95th percentile = 0.05 level) = $u\n";
456
457 $t=Statistics::Distributions::tdistr (1,.005);
458 print "t-crit (1 degree of freedom, 99.5th percentile = 0.005 level) "
459 ."= $t\n";
460
461 $f=Statistics::Distributions::fdistr (1,3,.01);
462 print "F-crit (1 degree of freedom in numerator, 3 degrees of freedom "
463 ."in denominator, 99th percentile = 0.01 level) = $f\n";
464
465 $uprob=Statistics::Distributions::uprob (-0.85);
466 print "upper probability of the u distribution (u = -0.85): Q(u) "
467 ."= 1-G(u) = $uprob\n";
468
469 $chisprob=Statistics::Distributions::chisqrprob (3,6.25);
470 print "upper probability of the chi-square distribution (3 degrees "
471 ."of freedom, chi-squared = 6.25): Q = 1-G = $chisprob\n";
472
473 $tprob=Statistics::Distributions::tprob (3,6.251);
474 print "upper probability of the t distribution (3 degrees of "
475 ."freedom, t = 6.251): Q = 1-G = $tprob\n";
476
477 $fprob=Statistics::Distributions::fprob (3,5,.625);
478 print "upper probability of the F distribution (3 degrees of freedom "
479 ."in numerator, 5 degrees of freedom in denominator, F = 6.25): "
480 ."Q = 1-G = $fprob\n";
481
482 =head1 DESCRIPTION
483
484 This Perl module calculates percentage points (5 significant digits) of the u (standard normal) distribution, the student's t distribution, the chi-square distribution and the F distribution. It can also calculate the upper probability (5 significant digits) of the u (standard normal), the chi-square, the t and the F distribution.
485 These critical values are needed to perform statistical tests, like the u test, the t test, the F test and the chi-squared test, and to calculate confidence intervals.
486
487 If you are interested in more precise algorithms you could look at:
488 StatLib: http://lib.stat.cmu.edu/apstat/ ;
489 Applied Statistics Algorithms by Griffiths, P. and Hill, I.D., Ellis Horwood: Chichester (1985)
490
491 =head1 BUGS
492
493 This final version 1.02 has been released after more than one year without a bug report on the previous version 0.07.
494 Nevertheless, if you find any bugs or oddities, please do inform the author.
495
496 =head1 INSTALLATION
497
498 See perlmodinstall for information and options on installing Perl modules.
499
500 =head1 AVAILABILITY
501
502 The latest version of this module is available from the Distribution Perl Archive Network (CPAN). Please visit http://www.cpan.org/ to find a CPAN site near you or see http://www.cpan.org/authors/id/M/MI/MIKEK/ .
503
504 =head1 AUTHOR
505
506 Michael Kospach <mike.perl@gmx.at>
507
508 Nice formating, simplification and bug repair by Matthias Trautner Kromann <mtk@id.cbs.dk>
509
510 =head1 COPYRIGHT
511
512 Copyright 2003 Michael Kospach. All rights reserved.
513
514 This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
515
516 =head1 SEE ALSO
517
518 Statistics::ChiSquare, Statistics::Table::t, Statistics::Table::F, perl(1).
519
520 =cut
521
522
523
524
525
526
527
528
529