comparison PGAP-1.2.1/Statistics/LineFit.pm @ 0:83e62a1aeeeb draft

Uploaded
author dereeper
date Thu, 24 Jun 2021 13:51:52 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:83e62a1aeeeb
1 package Statistics::LineFit;
2 use strict;
3 use Carp qw(carp);
4 BEGIN {
5 use Exporter ();
6 use vars qw ($AUTHOR $VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
7 $AUTHOR = 'Richard Anderson <cpan(AT)richardanderson(DOT)org>';
8 @EXPORT = @EXPORT_OK = qw();
9 %EXPORT_TAGS = ();
10 @ISA = qw(Exporter);
11 $VERSION = 0.06;
12 }
13
14 sub new {
15 #
16 # Purpose: Create a new Statistics::LineFit object
17 #
18 my ($caller, $validate, $hush) = @_;
19 my $self = { doneRegress => 0,
20 gotData => 0,
21 hush => defined $hush ? $hush : 0,
22 validate => defined $validate ? $validate : 0,
23 };
24 bless $self, ref($caller) || $caller;
25 return $self;
26 }
27
28 sub coefficients {
29 #
30 # Purpose: Return the slope and intercept from least squares line fit
31 #
32 my $self = shift;
33 unless (defined $self->{intercept} and defined $self->{slope}) {
34 $self->regress() or return;
35 }
36 return ($self->{intercept}, $self->{slope});
37 }
38
39 sub computeSums {
40 #
41 # Purpose: Compute sum of x, y, x**2, y**2 and x*y (private method)
42 #
43 my $self = shift;
44 my ($sumX, $sumY, $sumXX, $sumYY, $sumXY) = (0, 0, 0, 0, 0);
45 if (defined $self->{weight}) {
46 for (my $i = 0; $i < $self->{numXY}; ++$i) {
47 $sumX += $self->{weight}[$i] * $self->{x}[$i];
48 $sumY += $self->{weight}[$i] * $self->{y}[$i];
49 $sumXX += $self->{weight}[$i] * $self->{x}[$i] ** 2;
50 $sumYY += $self->{weight}[$i] * $self->{y}[$i] ** 2;
51 $sumXY += $self->{weight}[$i] * $self->{x}[$i]
52 * $self->{y}[$i];
53 }
54 } else {
55 for (my $i = 0; $i < $self->{numXY}; ++$i) {
56 $sumX += $self->{x}[$i];
57 $sumY += $self->{y}[$i];
58 $sumXX += $self->{x}[$i] ** 2;
59 $sumYY += $self->{y}[$i] ** 2;
60 $sumXY += $self->{x}[$i] * $self->{y}[$i];
61 }
62 }
63 return ($sumX, $sumY, $sumXX, $sumYY, $sumXY);
64 }
65
66 sub durbinWatson {
67 #
68 # Purpose: Return the Durbin-Watson statistic
69 #
70 my $self = shift;
71 unless (defined $self->{durbinWatson}) {
72 $self->regress() or return;
73 my $sumErrDiff = 0;
74 my $errorTMinus1 = $self->{y}[0] - ($self->{intercept} + $self->{slope}
75 * $self->{x}[0]);
76 for (my $i = 1; $i < $self->{numXY}; ++$i) {
77 my $error = $self->{y}[$i] - ($self->{intercept} + $self->{slope}
78 * $self->{x}[$i]);
79 $sumErrDiff += ($error - $errorTMinus1) ** 2;
80 $errorTMinus1 = $error;
81 }
82 $self->{durbinWatson} = $self->sumSqErrors() > 0 ?
83 $sumErrDiff / $self->sumSqErrors() : 0;
84 }
85 return $self->{durbinWatson};
86 }
87
88 sub meanSqError {
89 #
90 # Purpose: Return the mean squared error
91 #
92 my $self = shift;
93 unless (defined $self->{meanSqError}) {
94 $self->regress() or return;
95 $self->{meanSqError} = $self->sumSqErrors() / $self->{numXY};
96 }
97 return $self->{meanSqError};
98 }
99
100 sub predictedYs {
101 #
102 # Purpose: Return the predicted y values
103 #
104 my $self = shift;
105 unless (defined $self->{predictedYs}) {
106 $self->regress() or return;
107 $self->{predictedYs} = [];
108 for (my $i = 0; $i < $self->{numXY}; ++$i) {
109 $self->{predictedYs}[$i] = $self->{intercept}
110 + $self->{slope} * $self->{x}[$i];
111 }
112 }
113 return @{$self->{predictedYs}};
114 }
115
116 sub regress {
117 #
118 # Purpose: Do weighted or unweighted least squares 2-D line fit (if needed)
119 #
120 # Description:
121 # The equations below apply to both the weighted and unweighted fit: the
122 # weights are normalized in setWeights(), so the sum of the weights is
123 # equal to numXY.
124 #
125 my $self = shift;
126 return $self->{regressOK} if $self->{doneRegress};
127 unless ($self->{gotData}) {
128 carp "No valid data input - can't do regression" unless $self->{hush};
129 return 0;
130 }
131 my ($sumX, $sumY, $sumYY, $sumXY);
132 ($sumX, $sumY, $self->{sumXX}, $sumYY, $sumXY) = $self->computeSums();
133 $self->{sumSqDevX} = $self->{sumXX} - $sumX ** 2 / $self->{numXY};
134 if ($self->{sumSqDevX} != 0) {
135 $self->{sumSqDevY} = $sumYY - $sumY ** 2 / $self->{numXY};
136 $self->{sumSqDevXY} = $sumXY - $sumX * $sumY / $self->{numXY};
137 $self->{slope} = $self->{sumSqDevXY} / $self->{sumSqDevX};
138 $self->{intercept} = ($sumY - $self->{slope} * $sumX) / $self->{numXY};
139 $self->{regressOK} = 1;
140 } else {
141 carp "Can't fit line when x values are all equal" unless $self->{hush};
142 $self->{sumXX} = $self->{sumSqDevX} = undef;
143 $self->{regressOK} = 0;
144 }
145 $self->{doneRegress} = 1;
146 return $self->{regressOK};
147 }
148
149 sub residuals {
150 #
151 # Purpose: Return the predicted Y values minus the observed Y values
152 #
153 my $self = shift;
154 unless (defined $self->{residuals}) {
155 $self->regress() or return;
156 $self->{residuals} = [];
157 for (my $i = 0; $i < $self->{numXY}; ++$i) {
158 $self->{residuals}[$i] = $self->{y}[$i] - ($self->{intercept}
159 + $self->{slope} * $self->{x}[$i]);
160 }
161 }
162 return @{$self->{residuals}};
163 }
164
165 sub rSquared {
166 #
167 # Purpose: Return the correlation coefficient
168 #
169 my $self = shift;
170 unless (defined $self->{rSquared}) {
171 $self->regress() or return;
172 my $denom = $self->{sumSqDevX} * $self->{sumSqDevY};
173 $self->{rSquared} = $denom != 0 ? $self->{sumSqDevXY} ** 2 / $denom : 1;
174 }
175 return $self->{rSquared};
176 }
177
178 sub setData {
179 #
180 # Purpose: Initialize (x,y) values and optional weights
181 #
182 my ($self, $x, $y, $weights) = @_;
183 $self->{doneRegress} = 0;
184 $self->{x} = $self->{y} = $self->{numXY} = $self->{weight}
185 = $self->{intercept} = $self->{slope} = $self->{rSquared}
186 = $self->{sigma} = $self->{durbinWatson} = $self->{meanSqError}
187 = $self->{sumSqErrors} = $self->{tStatInt} = $self->{tStatSlope}
188 = $self->{predictedYs} = $self->{residuals} = $self->{sumXX}
189 = $self->{sumSqDevX} = $self->{sumSqDevY} = $self->{sumSqDevXY}
190 = undef;
191 if (@$x < 2) {
192 carp "Must input more than one data point!" unless $self->{hush};
193 return 0;
194 }
195 $self->{numXY} = @$x;
196 if (ref $x->[0]) {
197 $self->setWeights($y) or return 0;
198 $self->{x} = [ ];
199 $self->{y} = [ ];
200 foreach my $xy (@$x) {
201 push @{$self->{x}}, $xy->[0];
202 push @{$self->{y}}, $xy->[1];
203 }
204 } else {
205 if (@$x != @$y) {
206 carp "Length of x and y arrays must be equal!" unless $self->{hush};
207 return 0;
208 }
209 $self->setWeights($weights) or return 0;
210 $self->{x} = [ @$x ];
211 $self->{y} = [ @$y ];
212 }
213 if ($self->{validate}) {
214 unless ($self->validData()) {
215 $self->{x} = $self->{y} = $self->{weights} = $self->{numXY} = undef;
216 return 0;
217 }
218 }
219 $self->{gotData} = 1;
220 return 1;
221 }
222
223 sub setWeights {
224 #
225 # Purpose: Normalize and initialize line fit weighting factors (private method)
226 #
227 my ($self, $weights) = @_;
228 return 1 unless defined $weights;
229 if (@$weights != $self->{numXY}) {
230 carp "Length of weight array must equal length of data array!"
231 unless $self->{hush};
232 return 0;
233 }
234 if ($self->{validate}) { $self->validWeights($weights) or return 0 }
235 my $sumW = my $numNonZero = 0;
236 foreach my $weight (@$weights) {
237 if ($weight < 0) {
238 carp "Weights must be non-negative numbers!" unless $self->{hush};
239 return 0;
240 }
241 $sumW += $weight;
242 if ($weight != 0) { ++$numNonZero }
243 }
244 if ($numNonZero < 2) {
245 carp "At least two weights must be nonzero!" unless $self->{hush};
246 return 0;
247 }
248 my $factor = @$weights / $sumW;
249 foreach my $weight (@$weights) { $weight *= $factor }
250 $self->{weight} = [ @$weights ];
251 return 1;
252 }
253
254 sub sigma {
255 #
256 # Purpose: Return the estimated homoscedastic standard deviation of the
257 # error term
258 #
259 my $self = shift;
260 unless (defined $self->{sigma}) {
261 $self->regress() or return;
262 $self->{sigma} = $self->{numXY} > 2 ?
263 sqrt($self->sumSqErrors() / ($self->{numXY} - 2)) : 0;
264 }
265 return $self->{sigma};
266 }
267
268 sub sumSqErrors {
269 #
270 # Purpose: Return the sum of the squared errors (private method)
271 #
272 my $self = shift;
273 unless (defined $self->{sumSqErrors}) {
274 $self->regress() or return;
275 $self->{sumSqErrors} = $self->{sumSqDevY} - $self->{sumSqDevX}
276 * $self->{slope} ** 2;
277 if ($self->{sumSqErrors} < 0) { $self->{sumSqErrors} = 0 }
278 }
279 return $self->{sumSqErrors};
280 }
281
282 sub tStatistics {
283 #
284 # Purpose: Return the T statistics
285 #
286 my $self = shift;
287 unless (defined $self->{tStatInt} and defined $self->{tStatSlope}) {
288 $self->regress() or return;
289 my $biasEstimateInt = $self->sigma() * sqrt($self->{sumXX}
290 / ($self->{sumSqDevX} * $self->{numXY}));
291 $self->{tStatInt} = $biasEstimateInt != 0 ?
292 $self->{intercept} / $biasEstimateInt : 0;
293 my $biasEstimateSlope = $self->sigma() / sqrt($self->{sumSqDevX});
294 $self->{tStatSlope} = $biasEstimateSlope != 0 ?
295 $self->{slope} / $biasEstimateSlope : 0;
296 }
297 return ($self->{tStatInt}, $self->{tStatSlope});
298 }
299
300 sub validData {
301 #
302 # Purpose: Verify that the input x-y data are numeric (private method)
303 #
304 my $self = shift;
305 for (my $i = 0; $i < $self->{numXY}; ++$i) {
306 if (not defined $self->{x}[$i]) {
307 carp "Input x[$i] is not defined" unless $self->{hush};
308 return 0;
309 }
310 if ($self->{x}[$i] !~
311 /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)
312 {
313 carp "Input x[$i] is not a number: $self->{x}[$i]"
314 unless $self->{hush};
315 return 0;
316 }
317 if (not defined $self->{y}[$i]) {
318 carp "Input y[$i] is not defined" unless $self->{hush};
319 return 0;
320 }
321 if ($self->{y}[$i] !~
322 /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)
323 {
324 carp "Input y[$i] is not a number: $self->{y}[$i]"
325 unless $self->{hush};
326 return 0;
327 }
328 }
329 return 1;
330 }
331
332 sub validWeights {
333 #
334 # Purpose: Verify that the input weights are numeric (private method)
335 #
336 my ($self, $weights) = @_;
337 for (my $i = 0; $i < @$weights; ++$i) {
338 if (not defined $weights->[$i]) {
339 carp "Input weights[$i] is not defined" unless $self->{hush};
340 return 0;
341 }
342 if ($weights->[$i]
343 !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)
344 {
345 carp "Input weights[$i] is not a number: $weights->[$i]"
346 unless $self->{hush};
347 return 0;
348 }
349 }
350 return 1;
351 }
352
353 sub varianceOfEstimates {
354 #
355 # Purpose: Return the variances in the estimates of the intercept and slope
356 #
357 my $self = shift;
358 unless (defined $self->{intercept} and defined $self->{slope}) {
359 $self->regress() or return;
360 }
361 my @predictedYs = $self->predictedYs();
362 my ($s, $sx, $sxx) = (0, 0, 0);
363 if (defined $self->{weight}) {
364 for (my $i = 0; $i < $self->{numXY}; ++$i) {
365 my $variance = ($predictedYs[$i] - $self->{y}[$i]) ** 2;
366 next if 0 == $variance;
367 $s += 1.0 / $variance;
368 $sx += $self->{weight}[$i] * $self->{x}[$i] / $variance;
369 $sxx += $self->{weight}[$i] * $self->{x}[$i] ** 2 / $variance;
370 }
371 } else {
372 for (my $i = 0; $i < $self->{numXY}; ++$i) {
373 my $variance = ($predictedYs[$i] - $self->{y}[$i]) ** 2;
374 next if 0 == $variance;
375 $s += 1.0 / $variance;
376 $sx += $self->{x}[$i] / $variance;
377 $sxx += $self->{x}[$i] ** 2 / $variance;
378 }
379 }
380 my $denominator = ($s * $sxx - $sx ** 2);
381 if (0 == $denominator) {
382 return;
383 } else {
384 return ($sxx / $denominator, $s / $denominator);
385 }
386 }
387
388 1;
389
390 __END__
391
392 =head1 NAME
393
394 Statistics::LineFit - Least squares line fit, weighted or unweighted
395
396 =head1 SYNOPSIS
397
398 use Statistics::LineFit;
399 $lineFit = Statistics::LineFit->new();
400 $lineFit->setData (\@xValues, \@yValues) or die "Invalid data";
401 ($intercept, $slope) = $lineFit->coefficients();
402 defined $intercept or die "Can't fit line if x values are all equal";
403 $rSquared = $lineFit->rSquared();
404 $meanSquaredError = $lineFit->meanSqError();
405 $durbinWatson = $lineFit->durbinWatson();
406 $sigma = $lineFit->sigma();
407 ($tStatIntercept, $tStatSlope) = $lineFit->tStatistics();
408 @predictedYs = $lineFit->predictedYs();
409 @residuals = $lineFit->residuals();
410 (varianceIntercept, $varianceSlope) = $lineFit->varianceOfEstimates();
411
412 =head1 DESCRIPTION
413
414 The Statistics::LineFit module does weighted or unweighted least-squares
415 line fitting to two-dimensional data (y = a + b * x). (This is also called
416 linear regression.) In addition to the slope and y-intercept, the module
417 can return the square of the correlation coefficient (R squared), the
418 Durbin-Watson statistic, the mean squared error, sigma, the t statistics,
419 the variance of the estimates of the slope and y-intercept,
420 the predicted y values and the residuals of the y values. (See the METHODS
421 section for a description of these statistics.)
422
423 The module accepts input data in separate x and y arrays or a single
424 2-D array (an array of arrayrefs). The optional weights are input in a
425 separate array. The module can optionally verify that the input data and
426 weights are valid numbers. If weights are input, the line fit minimizes
427 the weighted sum of the squared errors and the following statistics are
428 weighted: the correlation coefficient, the Durbin-Watson statistic, the
429 mean squared error, sigma and the t statistics.
430
431 The module is state-oriented and caches its results. Once you call the
432 setData() method, you can call the other methods in any order or call a
433 method several times without invoking redundant calculations. After calling
434 setData(), you can modify the input data or weights without affecting the
435 module's results.
436
437 The decision to use or not use weighting could be made using your a
438 priori knowledge of the data or using supplemental data. If the data is
439 sparse or contains non-random noise, weighting can degrade the solution.
440 Weighting is a good option if some points are suspect or less relevant (e.g.,
441 older terms in a time series, points that are known to have more noise).
442
443 =head1 ALGORITHM
444
445 The least-square line is the line that minimizes the sum of the squares
446 of the y residuals:
447
448 Minimize SUM((y[i] - (a + b * x[i])) ** 2)
449
450 Setting the parial derivatives of a and b to zero yields a solution that
451 can be expressed in terms of the means, variances and covariances of x and y:
452
453 b = SUM((x[i] - meanX) * (y[i] - meanY)) / SUM((x[i] - meanX) ** 2)
454
455 a = meanY - b * meanX
456
457 Note that a and b are undefined if all the x values are the same.
458
459 If you use weights, each term in the above sums is multiplied by the
460 value of the weight for that index. The program normalizes the weights
461 (after copying the input values) so that the sum of the weights equals
462 the number of points. This minimizes the differences between the weighted
463 and unweighted equations.
464
465 Statistics::LineFit uses equations that are mathematically equivalent to
466 the above equations and computationally more efficient. The module runs
467 in O(N) (linear time).
468
469 =head1 LIMITATIONS
470
471 The regression fails if the input x values are all equal or the only unequal
472 x values have zero weights. This is an inherent limit to fitting a line of
473 the form y = a + b * x. In this case, the module issues an error message
474 and methods that return statistical values will return undefined values.
475 You can also use the return value of the regress() method to check the
476 status of the regression.
477
478 As the sum of the squared deviations of the x values approaches zero,
479 the module's results becomes sensitive to the precision of floating point
480 operations on the host system.
481
482 If the x values are not all the same and the apparent "best fit" line is
483 vertical, the module will fit a horizontal line. For example, an input of
484 (1, 1), (1, 7), (2, 3), (2, 5) returns a slope of zero, an intercept of 4
485 and an R squared of zero. This is correct behavior because this line is the
486 best least-squares fit to the data for the given parameterization
487 (y = a + b * x).
488
489 On a 32-bit system the results are accurate to about 11 significant digits,
490 depending on the input data. Many of the installation tests will fail
491 on a system with word lengths of 16 bits or fewer. (You might want to
492 upgrade your old 80286 IBM PC.)
493
494 =head1 EXAMPLES
495
496 =head2 Alternate calling sequence:
497
498 use Statistics::LineFit;
499 $lineFit = Statistics::LineFit->new();
500 $lineFit->setData(\@x, \@y) or die "Invalid regression data\n";
501 if (defined $lineFit->rSquared()
502 and $lineFit->rSquared() > $threshold)
503 {
504 ($intercept, $slope) = $lineFit->coefficients();
505 print "Slope: $slope Y-intercept: $intercept\n";
506 }
507
508 =head2 Multiple calls with same object, validate input, suppress error messages:
509
510 use Statistics::LineFit;
511 $lineFit = Statistics::LineFit->new(1, 1);
512 while (1) {
513 @xy = read2Dxy(); # User-supplied subroutine
514 $lineFit->setData(\@xy);
515 ($intercept, $slope) = $lineFit->coefficients();
516 if (defined $intercept) {
517 print "Slope: $slope Y-intercept: $intercept\n";
518 }
519 }
520
521 =head1 METHODS
522
523 The module is state-oriented and caches its results. Once you call the
524 setData() method, you can call the other methods in any order or call
525 a method several times without invoking redundant calculations.
526
527 The regression fails if the x values are all the same. In this case,
528 the module issues an error message and methods that return statistical
529 values will return undefined values. You can also use the return value
530 of the regress() method to check the status of the regression.
531
532 =head2 new() - create a new Statistics::LineFit object
533
534 $lineFit = Statistics::LineFit->new();
535 $lineFit = Statistics::LineFit->new($validate);
536 $lineFit = Statistics::LineFit->new($validate, $hush);
537
538 $validate = 1 -> Verify input data is numeric (slower execution)
539 0 -> Don't verify input data (default, faster execution)
540 $hush = 1 -> Suppress error messages
541 = 0 -> Enable error messages (default)
542
543 =head2 coefficients() - Return the slope and y intercept
544
545 ($intercept, $slope) = $lineFit->coefficients();
546
547 The returned list is undefined if the regression fails.
548
549 =head2 durbinWatson() - Return the Durbin-Watson statistic
550
551 $durbinWatson = $lineFit->durbinWatson();
552
553 The Durbin-Watson test is a test for first-order autocorrelation in
554 the residuals of a time series regression. The Durbin-Watson statistic
555 has a range of 0 to 4; a value of 2 indicates there is no
556 autocorrelation.
557
558 The return value is undefined if the regression fails. If weights are
559 input, the return value is the weighted Durbin-Watson statistic.
560
561 =head2 meanSqError() - Return the mean squared error
562
563 $meanSquaredError = $lineFit->meanSqError();
564
565 The return value is undefined if the regression fails. If weights are
566 input, the return value is the weighted mean squared error.
567
568 =head2 predictedYs() - Return the predicted y values
569
570 @predictedYs = $lineFit->predictedYs();
571
572 The returned list is undefined if the regression fails.
573
574 =head2 regress() - Do the least squares line fit (if not already done)
575
576 $lineFit->regress() or die "Regression failed"
577
578 You don't need to call this method because it is invoked by the other
579 methods as needed. After you call setData(), you can call regress()
580 at any time to get the status of the regression for the current data.
581
582 =head2 residuals() - Return predicted y values minus input y values
583
584 @residuals = $lineFit->residuals();
585
586 The returned list is undefined if the regression fails.
587
588 =head2 rSquared() - Return the square of the correlation coefficient
589
590 $rSquared = $lineFit->rSquared();
591
592 R squared, also called the square of the Pearson product-moment correlation
593 coefficient, is a measure of goodness-of-fit. It is the fraction of the
594 variation in Y that can be attributed to the variation in X. A perfect fit
595 will have an R squared of 1; fitting a line to the vertices of a
596 regular polygon will yield an R squared of zero. Graphical displays of data
597 with an R squared of less than about 0.1 do not show a visible linear trend.
598
599 The return value is undefined if the regression fails. If weights are
600 input, the return value is the weighted correlation coefficient.
601
602 =head2 setData() - Initialize (x,y) values and optional weights
603
604 $lineFit->setData(\@x, \@y) or die "Invalid regression data";
605 $lineFit->setData(\@x, \@y, \@weights) or die "Invalid regression data";
606 $lineFit->setData(\@xy) or die "Invalid regression data";
607 $lineFit->setData(\@xy, \@weights) or die "Invalid regression data";
608
609 @xy is an array of arrayrefs; x values are $xy[$i][0], y values are
610 $xy[$i][1]. (The module does not access any indices greater than $xy[$i][1],
611 so the arrayrefs can point to arrays that are longer than two elements.)
612 The method identifies the difference between the first and fourth calling
613 signatures by examining the first argument.
614
615 The optional weights array must be the same length as the data array(s).
616 The weights must be non-negative numbers; at least two of the weights
617 must be nonzero. Only the relative size of the weights is significant:
618 the program normalizes the weights (after copying the input values) so
619 that the sum of the weights equals the number of points. If you want to
620 do multiple line fits using the same weights, the weights must be passed
621 to each call to setData().
622
623 The method will return zero if the array lengths don't match, there are
624 less than two data points, any weights are negative or less than two of
625 the weights are nonzero. If the new() method was called with validate = 1,
626 the method will also verify that the data and weights are valid numbers.
627 Once you successfully call setData(), the next call to any method other than
628 new() or setData() invokes the regression. You can modify the input data
629 or weights after calling setData() without affecting the module's results.
630
631 =head2 sigma() - Return the standard error of the estimate
632
633 $sigma = $lineFit->sigma();
634
635 Sigma is an estimate of the homoscedastic standard deviation of the
636 error. Sigma is also known as the standard error of the estimate.
637
638 The return value is undefined if the regression fails. If weights are
639 input, the return value is the weighted standard error.
640
641 =head2 tStatistics() - Return the t statistics
642
643 (tStatIntercept, $tStatSlope) = $lineFit->tStatistics();
644
645 The t statistic, also called the t ratio or Wald statistic, is used to
646 accept or reject a hypothesis using a table of cutoff values computed from
647 the t distribution. The t-statistic suggests that the estimated value is
648 (reasonable, too small, too large) when the t-statistic is (close to zero,
649 large and positive, large and negative).
650
651 The returned list is undefined if the regression fails. If weights
652 are input, the returned values are the weighted t statistics.
653
654 =head2 varianceOfEstimates() - Return variances of estimates of intercept, slope
655
656 (varianceIntercept, $varianceSlope) = $lineFit->varianceOfEstimates();
657
658 Assuming the data are noisy or inaccurate, the intercept and slope returned
659 by the coefficients() method are only estimates of the true intercept and
660 slope. The varianceofEstimate() method returns the variances of the
661 estimates of the intercept and slope, respectively. See Numerical Recipes
662 in C, section 15.2 (Fitting Data to a Straight Line), equation 15.2.9.
663
664 The returned list is undefined if the regression fails. If weights
665 are input, the returned values are the weighted variances.
666
667 =head1 SEE ALSO
668
669 Mendenhall, W., and Sincich, T.L., 2003, A Second Course in Statistics:
670 Regression Analysis, 6th ed., Prentice Hall.
671 Press, W. H., Flannery, B. P., Teukolsky, S. A., Vetterling, W. T., 1992,
672 Numerical Recipes in C : The Art of Scientific Computing, 2nd ed.,
673 Cambridge University Press.
674 The man page for perl(1).
675 The CPAN modules Statistics::OLS, Statistics::GaussHelmert and
676 Statistics::Regression.
677
678 Statistics::LineFit is simpler to use than Statistics::GaussHelmert or
679 Statistics::Regression. Statistics::LineFit was inspired by and borrows some
680 ideas from the venerable Statistics::OLS module.
681
682 The significant differences
683 between Statistics::LineFit and Statistics::OLS (version 0.07) are:
684
685 =over 4
686
687 =item B<Statistics::LineFit is more robust.>
688
689 Statistics::OLS returns incorrect results for certain input datasets.
690 Statistics::OLS does not deep copy its input arrays, which can lead
691 to subtle bugs. The Statistics::OLS installation test has only one
692 test and does not verify that the regression returns correct results.
693 In contrast, Statistics::LineFit has over 200 installation tests that use
694 various datasets/calling sequences to verify the accuracy of the
695 regression to within 1.0e-10.
696
697 =item B<Statistics::LineFit is faster.>
698
699 For a sequence of calls to new(), setData(\@x, \@y) and regress(),
700 Statistics::LineFit is faster than Statistics::OLS by factors of 2.0, 1.6
701 and 2.4 for array lengths of 5, 100 and 10000, respectively.
702
703 =item B<Statistics::LineFit can do weighted or unweighted regression.>
704
705 Statistics::OLS lacks this option.
706
707 =item B<Statistics::LineFit has a better interface.>
708
709 Once you call the Statistics::LineFit::setData() method, you can call the
710 other methods in any order and call methods multiple times without invoking
711 redundant calculations. Statistics::LineFit lets you enable or disable
712 data verification or error messages.
713
714 =item B<Statistics::LineFit has better code and documentation.>
715
716 The code in Statistics::LineFit is more readable, more object oriented and
717 more compliant with Perl coding standards than the code in Statistics::OLS.
718 The documentation for Statistics::LineFit is more detailed and complete.
719
720 =back
721
722 =head1 AUTHOR
723
724 Richard Anderson, cpan(AT)richardanderson(DOT)org,
725 http://www.richardanderson.org
726
727 =head1 LICENSE
728
729 This program is free software; you can redistribute it and/or modify it under
730 the same terms as Perl itself.
731
732 The full text of the license can be found in the LICENSE file included in
733 the distribution and available in the CPAN listing for Statistics::LineFit
734 (see www.cpan.org or search.cpan.org).
735
736 =head1 DISCLAIMER
737
738 To the maximum extent permitted by applicable law, the author of this
739 module disclaims all warranties, either express or implied, including
740 but not limited to implied warranties of merchantability and fitness for
741 a particular purpose, with regard to the software and the accompanying
742 documentation.
743
744 =cut
745