Mercurial > repos > dereeper > pgap
comparison PGAP-1.2.1/Statistics/Distributions.pm @ 0:83e62a1aeeeb draft
Uploaded
author | dereeper |
---|---|
date | Thu, 24 Jun 2021 13:51:52 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:83e62a1aeeeb |
---|---|
1 package Statistics::Distributions; | |
2 | |
3 use strict; | |
4 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); | |
5 use constant PI => 3.1415926536; | |
6 use constant SIGNIFICANT => 5; # number of significant digits to be returned | |
7 | |
8 require Exporter; | |
9 | |
10 @ISA = qw(Exporter AutoLoader); | |
11 # Items to export into callers namespace by default. Note: do not export | |
12 # names by default without a very good reason. Use EXPORT_OK instead. | |
13 # Do not simply export all your public functions/methods/constants. | |
14 @EXPORT_OK = qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob); | |
15 $VERSION = '1.02'; | |
16 | |
17 # Preloaded methods go here. | |
18 | |
19 sub chisqrdistr { # Percentage points X^2(x^2,n) | |
20 my ($n, $p) = @_; | |
21 if ($n <= 0 || abs($n) - abs(int($n)) != 0) { | |
22 die "Invalid n: $n\n"; # degree of freedom | |
23 } | |
24 if ($p <= 0 || $p > 1) { | |
25 die "Invalid p: $p\n"; | |
26 } | |
27 return precision_string(_subchisqr($n, $p)); | |
28 } | |
29 | |
30 sub udistr { # Percentage points N(0,1^2) | |
31 my ($p) = (@_); | |
32 if ($p > 1 || $p <= 0) { | |
33 die "Invalid p: $p\n"; | |
34 } | |
35 return precision_string(_subu($p)); | |
36 } | |
37 | |
38 sub tdistr { # Percentage points t(x,n) | |
39 my ($n, $p) = @_; | |
40 if ($n <= 0 || abs($n) - abs(int($n)) != 0) { | |
41 die "Invalid n: $n\n"; | |
42 } | |
43 if ($p <= 0 || $p >= 1) { | |
44 die "Invalid p: $p\n"; | |
45 } | |
46 return precision_string(_subt($n, $p)); | |
47 } | |
48 | |
49 sub fdistr { # Percentage points F(x,n1,n2) | |
50 my ($n, $m, $p) = @_; | |
51 if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) { | |
52 die "Invalid n: $n\n"; # first degree of freedom | |
53 } | |
54 if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) { | |
55 die "Invalid m: $m\n"; # second degree of freedom | |
56 } | |
57 if (($p<=0) || ($p>1)) { | |
58 die "Invalid p: $p\n"; | |
59 } | |
60 return precision_string(_subf($n, $m, $p)); | |
61 } | |
62 | |
63 sub uprob { # Upper probability N(0,1^2) | |
64 my ($x) = @_; | |
65 return precision_string(_subuprob($x)); | |
66 } | |
67 | |
68 sub chisqrprob { # Upper probability X^2(x^2,n) | |
69 my ($n,$x) = @_; | |
70 if (($n <= 0) || ((abs($n) - (abs(int($n)))) != 0)) { | |
71 die "Invalid n: $n\n"; # degree of freedom | |
72 } | |
73 return precision_string(_subchisqrprob($n, $x)); | |
74 } | |
75 | |
76 sub tprob { # Upper probability t(x,n) | |
77 my ($n, $x) = @_; | |
78 if (($n <= 0) || ((abs($n) - abs(int($n))) !=0)) { | |
79 die "Invalid n: $n\n"; # degree of freedom | |
80 } | |
81 return precision_string(_subtprob($n, $x)); | |
82 } | |
83 | |
84 sub fprob { # Upper probability F(x,n1,n2) | |
85 my ($n, $m, $x) = @_; | |
86 if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) { | |
87 die "Invalid n: $n\n"; # first degree of freedom | |
88 } | |
89 if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) { | |
90 die "Invalid m: $m\n"; # second degree of freedom | |
91 } | |
92 return precision_string(_subfprob($n, $m, $x)); | |
93 } | |
94 | |
95 | |
96 sub _subfprob { | |
97 my ($n, $m, $x) = @_; | |
98 my $p; | |
99 | |
100 if ($x<=0) { | |
101 $p=1; | |
102 } elsif ($m % 2 == 0) { | |
103 my $z = $m / ($m + $n * $x); | |
104 my $a = 1; | |
105 for (my $i = $m - 2; $i >= 2; $i -= 2) { | |
106 $a = 1 + ($n + $i - 2) / $i * $z * $a; | |
107 } | |
108 $p = 1 - ((1 - $z) ** ($n / 2) * $a); | |
109 } elsif ($n % 2 == 0) { | |
110 my $z = $n * $x / ($m + $n * $x); | |
111 my $a = 1; | |
112 for (my $i = $n - 2; $i >= 2; $i -= 2) { | |
113 $a = 1 + ($m + $i - 2) / $i * $z * $a; | |
114 } | |
115 $p = (1 - $z) ** ($m / 2) * $a; | |
116 } else { | |
117 my $y = atan2(sqrt($n * $x / $m), 1); | |
118 my $z = sin($y) ** 2; | |
119 my $a = ($n == 1) ? 0 : 1; | |
120 for (my $i = $n - 2; $i >= 3; $i -= 2) { | |
121 $a = 1 + ($m + $i - 2) / $i * $z * $a; | |
122 } | |
123 my $b = PI; | |
124 for (my $i = 2; $i <= $m - 1; $i += 2) { | |
125 $b *= ($i - 1) / $i; | |
126 } | |
127 my $p1 = 2 / $b * sin($y) * cos($y) ** $m * $a; | |
128 | |
129 $z = cos($y) ** 2; | |
130 $a = ($m == 1) ? 0 : 1; | |
131 for (my $i = $m-2; $i >= 3; $i -= 2) { | |
132 $a = 1 + ($i - 1) / $i * $z * $a; | |
133 } | |
134 $p = max(0, $p1 + 1 - 2 * $y / PI | |
135 - 2 / PI * sin($y) * cos($y) * $a); | |
136 } | |
137 return $p; | |
138 } | |
139 | |
140 | |
141 sub _subchisqrprob { | |
142 my ($n,$x) = @_; | |
143 my $p; | |
144 | |
145 if ($x <= 0) { | |
146 $p = 1; | |
147 } elsif ($n > 100) { | |
148 $p = _subuprob((($x / $n) ** (1/3) | |
149 - (1 - 2/9/$n)) / sqrt(2/9/$n)); | |
150 } elsif ($x > 400) { | |
151 $p = 0; | |
152 } else { | |
153 my ($a, $i, $i1); | |
154 if (($n % 2) != 0) { | |
155 $p = 2 * _subuprob(sqrt($x)); | |
156 $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x); | |
157 $i1 = 1; | |
158 } else { | |
159 $p = $a = exp(-$x/2); | |
160 $i1 = 2; | |
161 } | |
162 | |
163 for ($i = $i1; $i <= ($n-2); $i += 2) { | |
164 $a *= $x / $i; | |
165 $p += $a; | |
166 } | |
167 } | |
168 return $p; | |
169 } | |
170 | |
171 sub _subu { | |
172 my ($p) = @_; | |
173 my $y = -log(4 * $p * (1 - $p)); | |
174 my $x = sqrt( | |
175 $y * (1.570796288 | |
176 + $y * (.03706987906 | |
177 + $y * (-.8364353589E-3 | |
178 + $y *(-.2250947176E-3 | |
179 + $y * (.6841218299E-5 | |
180 + $y * (0.5824238515E-5 | |
181 + $y * (-.104527497E-5 | |
182 + $y * (.8360937017E-7 | |
183 + $y * (-.3231081277E-8 | |
184 + $y * (.3657763036E-10 | |
185 + $y *.6936233982E-12))))))))))); | |
186 $x = -$x if ($p>.5); | |
187 return $x; | |
188 } | |
189 | |
190 sub _subuprob { | |
191 my ($x) = @_; | |
192 my $p = 0; # if ($absx > 100) | |
193 my $absx = abs($x); | |
194 | |
195 if ($absx < 1.9) { | |
196 $p = (1 + | |
197 $absx * (.049867347 | |
198 + $absx * (.0211410061 | |
199 + $absx * (.0032776263 | |
200 + $absx * (.0000380036 | |
201 + $absx * (.0000488906 | |
202 + $absx * .000005383)))))) ** -16/2; | |
203 } elsif ($absx <= 100) { | |
204 for (my $i = 18; $i >= 1; $i--) { | |
205 $p = $i / ($absx + $p); | |
206 } | |
207 $p = exp(-.5 * $absx * $absx) | |
208 / sqrt(2 * PI) / ($absx + $p); | |
209 } | |
210 | |
211 $p = 1 - $p if ($x<0); | |
212 return $p; | |
213 } | |
214 | |
215 | |
216 sub _subt { | |
217 my ($n, $p) = @_; | |
218 | |
219 if ($p >= 1 || $p <= 0) { | |
220 die "Invalid p: $p\n"; | |
221 } | |
222 | |
223 if ($p == 0.5) { | |
224 return 0; | |
225 } elsif ($p < 0.5) { | |
226 return - _subt($n, 1 - $p); | |
227 } | |
228 | |
229 my $u = _subu($p); | |
230 my $u2 = $u ** 2; | |
231 | |
232 my $a = ($u2 + 1) / 4; | |
233 my $b = ((5 * $u2 + 16) * $u2 + 3) / 96; | |
234 my $c = (((3 * $u2 + 19) * $u2 + 17) * $u2 - 15) / 384; | |
235 my $d = ((((79 * $u2 + 776) * $u2 + 1482) * $u2 - 1920) * $u2 - 945) | |
236 / 92160; | |
237 my $e = (((((27 * $u2 + 339) * $u2 + 930) * $u2 - 1782) * $u2 - 765) * $u2 | |
238 + 17955) / 368640; | |
239 | |
240 my $x = $u * (1 + ($a + ($b + ($c + ($d + $e / $n) / $n) / $n) / $n) / $n); | |
241 | |
242 if ($n <= log10($p) ** 2 + 3) { | |
243 my $round; | |
244 do { | |
245 my $p1 = _subtprob($n, $x); | |
246 my $n1 = $n + 1; | |
247 my $delta = ($p1 - $p) | |
248 / exp(($n1 * log($n1 / ($n + $x * $x)) | |
249 + log($n/$n1/2/PI) - 1 | |
250 + (1/$n1 - 1/$n) / 6) / 2); | |
251 $x += $delta; | |
252 $round = sprintf("%.".abs(int(log10(abs $x)-4))."f",$delta); | |
253 } while (($x) && ($round != 0)); | |
254 } | |
255 return $x; | |
256 } | |
257 | |
258 sub _subtprob { | |
259 my ($n, $x) = @_; | |
260 | |
261 my ($a,$b); | |
262 my $w = atan2($x / sqrt($n), 1); | |
263 my $z = cos($w) ** 2; | |
264 my $y = 1; | |
265 | |
266 for (my $i = $n-2; $i >= 2; $i -= 2) { | |
267 $y = 1 + ($i-1) / $i * $z * $y; | |
268 } | |
269 | |
270 if ($n % 2 == 0) { | |
271 $a = sin($w)/2; | |
272 $b = .5; | |
273 } else { | |
274 $a = ($n == 1) ? 0 : sin($w)*cos($w)/PI; | |
275 $b= .5 + $w/PI; | |
276 } | |
277 return max(0, 1 - $b - $a * $y); | |
278 } | |
279 | |
280 sub _subf { | |
281 my ($n, $m, $p) = @_; | |
282 my $x; | |
283 | |
284 if ($p >= 1 || $p <= 0) { | |
285 die "Invalid p: $p\n"; | |
286 } | |
287 | |
288 if ($p == 1) { | |
289 $x = 0; | |
290 } elsif ($m == 1) { | |
291 $x = 1 / (_subt($n, 0.5 - $p / 2) ** 2); | |
292 } elsif ($n == 1) { | |
293 $x = _subt($m, $p/2) ** 2; | |
294 } elsif ($m == 2) { | |
295 my $u = _subchisqr($m, 1 - $p); | |
296 my $a = $m - 2; | |
297 $x = 1 / ($u / $m * (1 + | |
298 (($u - $a) / 2 + | |
299 (((4 * $u - 11 * $a) * $u + $a * (7 * $m - 10)) / 24 + | |
300 (((2 * $u - 10 * $a) * $u + $a * (17 * $m - 26)) * $u | |
301 - $a * $a * (9 * $m - 6) | |
302 )/48/$n | |
303 )/$n | |
304 )/$n)); | |
305 } elsif ($n > $m) { | |
306 $x = 1 / _subf2($m, $n, 1 - $p) | |
307 } else { | |
308 $x = _subf2($n, $m, $p) | |
309 } | |
310 return $x; | |
311 } | |
312 | |
313 sub _subf2 { | |
314 my ($n, $m, $p) = @_; | |
315 my $u = _subchisqr($n, $p); | |
316 my $n2 = $n - 2; | |
317 my $x = $u / $n * | |
318 (1 + | |
319 (($u - $n2) / 2 + | |
320 (((4 * $u - 11 * $n2) * $u + $n2 * (7 * $n - 10)) / 24 + | |
321 (((2 * $u - 10 * $n2) * $u + $n2 * (17 * $n - 26)) * $u | |
322 - $n2 * $n2 * (9 * $n - 6)) / 48 / $m) / $m) / $m); | |
323 my $delta; | |
324 do { | |
325 my $z = exp( | |
326 (($n+$m) * log(($n+$m) / ($n * $x + $m)) | |
327 + ($n - 2) * log($x) | |
328 + log($n * $m / ($n+$m)) | |
329 - log(4 * PI) | |
330 - (1/$n + 1/$m - 1/($n+$m))/6 | |
331 )/2); | |
332 $delta = (_subfprob($n, $m, $x) - $p) / $z; | |
333 $x += $delta; | |
334 } while (abs($delta)>3e-4); | |
335 return $x; | |
336 } | |
337 | |
338 sub _subchisqr { | |
339 my ($n, $p) = @_; | |
340 my $x; | |
341 | |
342 if (($p > 1) || ($p <= 0)) { | |
343 die "Invalid p: $p\n"; | |
344 } elsif ($p == 1){ | |
345 $x = 0; | |
346 } elsif ($n == 1) { | |
347 $x = _subu($p / 2) ** 2; | |
348 } elsif ($n == 2) { | |
349 $x = -2 * log($p); | |
350 } else { | |
351 my $u = _subu($p); | |
352 my $u2 = $u * $u; | |
353 | |
354 $x = max(0, $n + sqrt(2 * $n) * $u | |
355 + 2/3 * ($u2 - 1) | |
356 + $u * ($u2 - 7) / 9 / sqrt(2 * $n) | |
357 - 2/405 / $n * ($u2 * (3 *$u2 + 7) - 16)); | |
358 | |
359 if ($n <= 100) { | |
360 my ($x0, $p1, $z); | |
361 do { | |
362 $x0 = $x; | |
363 if ($x < 0) { | |
364 $p1 = 1; | |
365 } elsif ($n>100) { | |
366 $p1 = _subuprob((($x / $n)**(1/3) - (1 - 2/9/$n)) | |
367 / sqrt(2/9/$n)); | |
368 } elsif ($x>400) { | |
369 $p1 = 0; | |
370 } else { | |
371 my ($i0, $a); | |
372 if (($n % 2) != 0) { | |
373 $p1 = 2 * _subuprob(sqrt($x)); | |
374 $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x); | |
375 $i0 = 1; | |
376 } else { | |
377 $p1 = $a = exp(-$x/2); | |
378 $i0 = 2; | |
379 } | |
380 | |
381 for (my $i = $i0; $i <= $n-2; $i += 2) { | |
382 $a *= $x / $i; | |
383 $p1 += $a; | |
384 } | |
385 } | |
386 $z = exp((($n-1) * log($x/$n) - log(4*PI*$x) | |
387 + $n - $x - 1/$n/6) / 2); | |
388 $x += ($p1 - $p) / $z; | |
389 $x = sprintf("%.5f", $x); | |
390 } while (($n < 31) && (abs($x0 - $x) > 1e-4)); | |
391 } | |
392 } | |
393 return $x; | |
394 } | |
395 | |
396 sub log10 { | |
397 my $n = shift; | |
398 return log($n) / log(10); | |
399 } | |
400 | |
401 sub max { | |
402 my $max = shift; | |
403 my $next; | |
404 while (@_) { | |
405 $next = shift; | |
406 $max = $next if ($next > $max); | |
407 } | |
408 return $max; | |
409 } | |
410 | |
411 sub min { | |
412 my $min = shift; | |
413 my $next; | |
414 while (@_) { | |
415 $next = shift; | |
416 $min = $next if ($next < $min); | |
417 } | |
418 return $min; | |
419 } | |
420 | |
421 sub precision { | |
422 my ($x) = @_; | |
423 return abs int(log10(abs $x) - SIGNIFICANT); | |
424 } | |
425 | |
426 sub precision_string { | |
427 my ($x) = @_; | |
428 if ($x) { | |
429 return sprintf "%." . precision($x) . "f", $x; | |
430 } else { | |
431 return "0"; | |
432 } | |
433 } | |
434 | |
435 | |
436 # Autoload methods go after =cut, and are processed by the autosplit program. | |
437 | |
438 1; | |
439 __END__ | |
440 # Below is the stub of documentation for your module. You better edit it! | |
441 | |
442 =head1 NAME | |
443 | |
444 Statistics::Distributions - Perl module for calculating critical values and upper probabilities of common statistical distributions | |
445 | |
446 =head1 SYNOPSIS | |
447 | |
448 use Statistics::Distributions; | |
449 | |
450 $chis=Statistics::Distributions::chisqrdistr (2,.05); | |
451 print "Chi-squared-crit (2 degrees of freedom, 95th percentile " | |
452 ."= 0.05 level) = $chis\n"; | |
453 | |
454 $u=Statistics::Distributions::udistr (.05); | |
455 print "u-crit (95th percentile = 0.05 level) = $u\n"; | |
456 | |
457 $t=Statistics::Distributions::tdistr (1,.005); | |
458 print "t-crit (1 degree of freedom, 99.5th percentile = 0.005 level) " | |
459 ."= $t\n"; | |
460 | |
461 $f=Statistics::Distributions::fdistr (1,3,.01); | |
462 print "F-crit (1 degree of freedom in numerator, 3 degrees of freedom " | |
463 ."in denominator, 99th percentile = 0.01 level) = $f\n"; | |
464 | |
465 $uprob=Statistics::Distributions::uprob (-0.85); | |
466 print "upper probability of the u distribution (u = -0.85): Q(u) " | |
467 ."= 1-G(u) = $uprob\n"; | |
468 | |
469 $chisprob=Statistics::Distributions::chisqrprob (3,6.25); | |
470 print "upper probability of the chi-square distribution (3 degrees " | |
471 ."of freedom, chi-squared = 6.25): Q = 1-G = $chisprob\n"; | |
472 | |
473 $tprob=Statistics::Distributions::tprob (3,6.251); | |
474 print "upper probability of the t distribution (3 degrees of " | |
475 ."freedom, t = 6.251): Q = 1-G = $tprob\n"; | |
476 | |
477 $fprob=Statistics::Distributions::fprob (3,5,.625); | |
478 print "upper probability of the F distribution (3 degrees of freedom " | |
479 ."in numerator, 5 degrees of freedom in denominator, F = 6.25): " | |
480 ."Q = 1-G = $fprob\n"; | |
481 | |
482 =head1 DESCRIPTION | |
483 | |
484 This Perl module calculates percentage points (5 significant digits) of the u (standard normal) distribution, the student's t distribution, the chi-square distribution and the F distribution. It can also calculate the upper probability (5 significant digits) of the u (standard normal), the chi-square, the t and the F distribution. | |
485 These critical values are needed to perform statistical tests, like the u test, the t test, the F test and the chi-squared test, and to calculate confidence intervals. | |
486 | |
487 If you are interested in more precise algorithms you could look at: | |
488 StatLib: http://lib.stat.cmu.edu/apstat/ ; | |
489 Applied Statistics Algorithms by Griffiths, P. and Hill, I.D., Ellis Horwood: Chichester (1985) | |
490 | |
491 =head1 BUGS | |
492 | |
493 This final version 1.02 has been released after more than one year without a bug report on the previous version 0.07. | |
494 Nevertheless, if you find any bugs or oddities, please do inform the author. | |
495 | |
496 =head1 INSTALLATION | |
497 | |
498 See perlmodinstall for information and options on installing Perl modules. | |
499 | |
500 =head1 AVAILABILITY | |
501 | |
502 The latest version of this module is available from the Distribution Perl Archive Network (CPAN). Please visit http://www.cpan.org/ to find a CPAN site near you or see http://www.cpan.org/authors/id/M/MI/MIKEK/ . | |
503 | |
504 =head1 AUTHOR | |
505 | |
506 Michael Kospach <mike.perl@gmx.at> | |
507 | |
508 Nice formating, simplification and bug repair by Matthias Trautner Kromann <mtk@id.cbs.dk> | |
509 | |
510 =head1 COPYRIGHT | |
511 | |
512 Copyright 2003 Michael Kospach. All rights reserved. | |
513 | |
514 This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
515 | |
516 =head1 SEE ALSO | |
517 | |
518 Statistics::ChiSquare, Statistics::Table::t, Statistics::Table::F, perl(1). | |
519 | |
520 =cut | |
521 | |
522 | |
523 | |
524 | |
525 | |
526 | |
527 | |
528 | |
529 |