Mercurial > repos > dereeper > pgap

diff PGAP-1.2.1/Statistics/LineFit.pm @ 0:83e62a1aeeeb draft
Uploaded
author: dereeper
date: Thu, 24 Jun 2021 13:51:52 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PGAP-1.2.1/Statistics/LineFit.pm	Thu Jun 24 13:51:52 2021 +0000
@@ -0,0 +1,745 @@
+package Statistics::LineFit;
+use strict;
+use Carp qw(carp);
+BEGIN {
+        use Exporter ();
+        use vars qw ($AUTHOR $VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+        $AUTHOR      = 'Richard Anderson <cpan(AT)richardanderson(DOT)org>';
+        @EXPORT      = @EXPORT_OK = qw();
+        %EXPORT_TAGS = ();
+        @ISA         = qw(Exporter);
+        $VERSION     = 0.06;
+}
+
+sub new {
+#
+# Purpose: Create a new Statistics::LineFit object
+#
+    my ($caller, $validate, $hush) = @_;
+    my $self = { doneRegress  => 0,
+                 gotData      => 0,
+                 hush         => defined $hush ? $hush : 0,
+                 validate     => defined $validate ? $validate : 0,
+               };
+    bless $self, ref($caller) || $caller;
+    return $self;
+}
+
+sub coefficients {
+#
+# Purpose: Return the slope and intercept from least squares line fit
+# 
+    my $self = shift;
+    unless (defined $self->{intercept} and defined $self->{slope}) {
+        $self->regress() or return;
+    }
+    return ($self->{intercept}, $self->{slope});
+}
+
+sub computeSums {
+#
+# Purpose: Compute sum of x, y, x**2, y**2 and x*y (private method)
+#
+    my $self = shift;
+    my ($sumX, $sumY, $sumXX, $sumYY, $sumXY) = (0, 0, 0, 0, 0);
+    if (defined $self->{weight}) {
+        for (my $i = 0; $i < $self->{numXY}; ++$i) {
+            $sumX += $self->{weight}[$i] * $self->{x}[$i];
+            $sumY += $self->{weight}[$i] * $self->{y}[$i];
+            $sumXX += $self->{weight}[$i] * $self->{x}[$i] ** 2;
+            $sumYY += $self->{weight}[$i] * $self->{y}[$i] ** 2;
+            $sumXY += $self->{weight}[$i] * $self->{x}[$i] 
+                * $self->{y}[$i];
+        }
+    } else {
+        for (my $i = 0; $i < $self->{numXY}; ++$i) {
+            $sumX += $self->{x}[$i];
+            $sumY += $self->{y}[$i];
+            $sumXX += $self->{x}[$i] ** 2;
+            $sumYY += $self->{y}[$i] ** 2;
+            $sumXY += $self->{x}[$i] * $self->{y}[$i];
+        }
+    }
+    return ($sumX, $sumY, $sumXX, $sumYY, $sumXY);
+}
+
+sub durbinWatson {
+#
+# Purpose: Return the Durbin-Watson statistic
+# 
+    my $self = shift;
+    unless (defined $self->{durbinWatson}) {
+        $self->regress() or return;
+        my $sumErrDiff = 0;
+        my $errorTMinus1 = $self->{y}[0] - ($self->{intercept} + $self->{slope}
+            * $self->{x}[0]);
+        for (my $i = 1; $i < $self->{numXY}; ++$i) {
+            my $error = $self->{y}[$i] - ($self->{intercept} + $self->{slope}
+                * $self->{x}[$i]);
+            $sumErrDiff += ($error - $errorTMinus1) ** 2;
+            $errorTMinus1 = $error;
+        }
+        $self->{durbinWatson} = $self->sumSqErrors() > 0 ?
+            $sumErrDiff / $self->sumSqErrors() : 0;
+    }
+    return $self->{durbinWatson};
+}
+
+sub meanSqError {
+#
+# Purpose: Return the mean squared error
+# 
+    my $self = shift;
+    unless (defined $self->{meanSqError}) {
+        $self->regress() or return;
+        $self->{meanSqError} = $self->sumSqErrors() / $self->{numXY};
+    }
+    return $self->{meanSqError};
+}
+
+sub predictedYs {
+#
+# Purpose: Return the predicted y values
+# 
+    my $self = shift;
+    unless (defined $self->{predictedYs}) {
+        $self->regress() or return;
+        $self->{predictedYs} = [];
+        for (my $i = 0; $i < $self->{numXY}; ++$i) {
+            $self->{predictedYs}[$i] = $self->{intercept} 
+                + $self->{slope} * $self->{x}[$i];
+        }
+    }
+    return @{$self->{predictedYs}};
+}
+
+sub regress {
+#
+# Purpose: Do weighted or unweighted least squares 2-D line fit (if needed)
+#
+# Description:
+# The equations below apply to both the weighted and unweighted fit: the
+# weights are normalized in setWeights(), so the sum of the weights is
+# equal to numXY.
+# 
+    my $self = shift;
+    return $self->{regressOK} if $self->{doneRegress};
+    unless ($self->{gotData}) {
+        carp "No valid data input - can't do regression" unless $self->{hush};
+        return 0;
+    } 
+    my ($sumX, $sumY, $sumYY, $sumXY);
+    ($sumX, $sumY, $self->{sumXX}, $sumYY, $sumXY) = $self->computeSums();
+    $self->{sumSqDevX} = $self->{sumXX} - $sumX ** 2 / $self->{numXY};
+    if ($self->{sumSqDevX} != 0) {
+        $self->{sumSqDevY} = $sumYY - $sumY ** 2 / $self->{numXY};
+        $self->{sumSqDevXY} = $sumXY - $sumX * $sumY / $self->{numXY};
+        $self->{slope} = $self->{sumSqDevXY} / $self->{sumSqDevX};
+        $self->{intercept} = ($sumY - $self->{slope} * $sumX) / $self->{numXY};
+        $self->{regressOK} = 1;
+    } else {
+        carp "Can't fit line when x values are all equal" unless $self->{hush};
+        $self->{sumXX} = $self->{sumSqDevX} = undef;
+        $self->{regressOK} = 0;
+    }
+    $self->{doneRegress} = 1;
+    return $self->{regressOK};
+}
+
+sub residuals {
+#
+# Purpose: Return the predicted Y values minus the observed Y values
+# 
+    my $self = shift;
+    unless (defined $self->{residuals}) {
+        $self->regress() or return;
+        $self->{residuals} = [];
+        for (my $i = 0; $i < $self->{numXY}; ++$i) {
+            $self->{residuals}[$i] = $self->{y}[$i] - ($self->{intercept} 
+                + $self->{slope} * $self->{x}[$i]);
+        }
+    }
+    return @{$self->{residuals}};
+}
+
+sub rSquared {
+#
+# Purpose: Return the correlation coefficient
+# 
+    my $self = shift;
+    unless (defined $self->{rSquared}) {
+        $self->regress() or return;
+        my $denom = $self->{sumSqDevX} * $self->{sumSqDevY};
+        $self->{rSquared} = $denom != 0 ? $self->{sumSqDevXY} ** 2 / $denom : 1;
+    }
+    return $self->{rSquared};
+}
+
+sub setData {
+#
+# Purpose: Initialize (x,y) values and optional weights
+# 
+    my ($self, $x, $y, $weights) = @_;
+    $self->{doneRegress} = 0;
+    $self->{x} = $self->{y} = $self->{numXY} = $self->{weight} 
+        = $self->{intercept} = $self->{slope} = $self->{rSquared} 
+        = $self->{sigma} = $self->{durbinWatson} = $self->{meanSqError} 
+        = $self->{sumSqErrors} = $self->{tStatInt} = $self->{tStatSlope} 
+        = $self->{predictedYs} = $self->{residuals} = $self->{sumXX} 
+        = $self->{sumSqDevX} = $self->{sumSqDevY} = $self->{sumSqDevXY} 
+        = undef;
+    if (@$x < 2) { 
+        carp "Must input more than one data point!" unless $self->{hush};
+        return 0;
+    }
+    $self->{numXY} = @$x;
+    if (ref $x->[0]) {
+        $self->setWeights($y) or return 0;
+        $self->{x} = [ ];
+        $self->{y} = [ ];
+        foreach my $xy (@$x) { 
+            push @{$self->{x}}, $xy->[0]; 
+            push @{$self->{y}}, $xy->[1]; 
+        }
+    } else {
+        if (@$x != @$y) { 
+            carp "Length of x and y arrays must be equal!" unless $self->{hush};
+            return 0;
+        }
+        $self->setWeights($weights) or return 0;
+        $self->{x} = [ @$x ];
+        $self->{y} = [ @$y ];
+    }
+    if ($self->{validate}) { 
+        unless ($self->validData()) { 
+            $self->{x} = $self->{y} = $self->{weights} = $self->{numXY} = undef;
+            return 0;
+        }
+    }
+    $self->{gotData} = 1;
+    return 1;
+}
+
+sub setWeights {
+#
+# Purpose: Normalize and initialize line fit weighting factors (private method)
+# 
+    my ($self, $weights) = @_;
+    return 1 unless defined $weights;
+    if (@$weights != $self->{numXY}) {
+        carp "Length of weight array must equal length of data array!"
+            unless $self->{hush};
+        return 0;
+    }
+    if ($self->{validate}) { $self->validWeights($weights) or return 0 } 
+    my $sumW = my $numNonZero = 0;
+    foreach my $weight (@$weights) {
+        if ($weight < 0) {
+            carp "Weights must be non-negative numbers!" unless $self->{hush};
+            return 0;
+        }
+        $sumW += $weight;
+        if ($weight != 0) { ++$numNonZero }
+    }
+    if ($numNonZero < 2) {
+        carp "At least two weights must be nonzero!" unless $self->{hush};
+        return 0;
+    }
+    my $factor = @$weights / $sumW;
+    foreach my $weight (@$weights) { $weight *= $factor }
+    $self->{weight} = [ @$weights ];
+    return 1;
+}
+
+sub sigma {
+#
+# Purpose: Return the estimated homoscedastic standard deviation of the
+#          error term
+# 
+    my $self = shift;
+    unless (defined $self->{sigma}) {
+        $self->regress() or return;
+        $self->{sigma} = $self->{numXY} > 2 ? 
+            sqrt($self->sumSqErrors() / ($self->{numXY} - 2)) : 0;
+    }
+    return $self->{sigma};
+}
+
+sub sumSqErrors {
+#
+# Purpose: Return the sum of the squared errors (private method)
+# 
+    my $self = shift;
+    unless (defined $self->{sumSqErrors}) {
+        $self->regress() or return;
+        $self->{sumSqErrors} = $self->{sumSqDevY} - $self->{sumSqDevX}
+            * $self->{slope} ** 2;
+        if ($self->{sumSqErrors} < 0) { $self->{sumSqErrors} = 0 } 
+    }
+    return $self->{sumSqErrors};
+}
+
+sub tStatistics {
+#
+# Purpose: Return the T statistics
+# 
+    my $self = shift;
+    unless (defined $self->{tStatInt} and defined $self->{tStatSlope}) {
+        $self->regress() or return;
+        my $biasEstimateInt = $self->sigma() * sqrt($self->{sumXX} 
+            / ($self->{sumSqDevX} * $self->{numXY}));
+        $self->{tStatInt} = $biasEstimateInt != 0 ?
+            $self->{intercept} / $biasEstimateInt : 0;
+        my $biasEstimateSlope = $self->sigma() / sqrt($self->{sumSqDevX});
+        $self->{tStatSlope} = $biasEstimateSlope != 0 ? 
+            $self->{slope} / $biasEstimateSlope : 0;
+    }
+    return ($self->{tStatInt}, $self->{tStatSlope});
+}
+
+sub validData {
+#
+# Purpose: Verify that the input x-y data are numeric (private method)
+# 
+    my $self = shift;
+    for (my $i = 0; $i < $self->{numXY}; ++$i) {
+        if (not defined $self->{x}[$i]) {
+            carp "Input x[$i] is not defined" unless $self->{hush};
+            return 0;
+        }
+        if ($self->{x}[$i] !~
+            /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)
+        {
+            carp "Input x[$i] is not a number: $self->{x}[$i]" 
+                unless $self->{hush};
+            return 0;
+        }
+        if (not defined $self->{y}[$i]) {
+            carp "Input y[$i] is not defined" unless $self->{hush};
+            return 0;
+        }
+        if ($self->{y}[$i] !~
+            /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)
+        {
+            carp "Input y[$i] is not a number: $self->{y}[$i]"
+                unless $self->{hush};
+            return 0;
+        }
+    }
+    return 1;
+}
+
+sub validWeights {
+#
+# Purpose: Verify that the input weights are numeric (private method)
+# 
+    my ($self, $weights) = @_;
+    for (my $i = 0; $i < @$weights; ++$i) {
+        if (not defined $weights->[$i]) {
+            carp "Input weights[$i] is not defined" unless $self->{hush};
+            return 0;
+        }
+        if ($weights->[$i]
+            !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)
+        {
+            carp "Input weights[$i] is not a number: $weights->[$i]"
+                unless $self->{hush};
+            return 0;
+        }
+    }
+    return 1;
+}
+
+sub varianceOfEstimates {
+#
+# Purpose: Return the variances in the estimates of the intercept and slope
+# 
+    my $self = shift;
+    unless (defined $self->{intercept} and defined $self->{slope}) {
+        $self->regress() or return;
+    }
+    my @predictedYs = $self->predictedYs();
+    my ($s, $sx, $sxx) = (0, 0, 0);
+    if (defined $self->{weight}) {
+        for (my $i = 0; $i < $self->{numXY}; ++$i) {
+            my $variance = ($predictedYs[$i] - $self->{y}[$i]) ** 2; 
+            next if 0 == $variance;
+            $s += 1.0 / $variance;
+	    $sx += $self->{weight}[$i] * $self->{x}[$i] / $variance;
+	    $sxx += $self->{weight}[$i] * $self->{x}[$i] ** 2 / $variance;
+        }
+    } else {
+        for (my $i = 0; $i < $self->{numXY}; ++$i) {
+            my $variance = ($predictedYs[$i] - $self->{y}[$i]) ** 2; 
+            next if 0 == $variance;
+            $s += 1.0 / $variance;
+	    $sx += $self->{x}[$i] / $variance;
+	    $sxx += $self->{x}[$i] ** 2 / $variance;
+        }
+    }
+    my $denominator = ($s * $sxx - $sx ** 2);
+    if (0 == $denominator) {
+        return;
+    } else {
+        return ($sxx / $denominator, $s / $denominator);
+    }
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Statistics::LineFit - Least squares line fit, weighted or unweighted
+
+=head1 SYNOPSIS
+
+ use Statistics::LineFit;
+ $lineFit = Statistics::LineFit->new();
+ $lineFit->setData (\@xValues, \@yValues) or die "Invalid data";
+ ($intercept, $slope) = $lineFit->coefficients();
+ defined $intercept or die "Can't fit line if x values are all equal";
+ $rSquared = $lineFit->rSquared();
+ $meanSquaredError = $lineFit->meanSqError();
+ $durbinWatson = $lineFit->durbinWatson();
+ $sigma = $lineFit->sigma();
+ ($tStatIntercept, $tStatSlope) = $lineFit->tStatistics();
+ @predictedYs = $lineFit->predictedYs();
+ @residuals = $lineFit->residuals();
+ (varianceIntercept, $varianceSlope) = $lineFit->varianceOfEstimates();
+
+=head1 DESCRIPTION
+
+The Statistics::LineFit module does weighted or unweighted least-squares
+line fitting to two-dimensional data (y = a + b * x).  (This is also called
+linear regression.)  In addition to the slope and y-intercept, the module
+can return the square of the correlation coefficient (R squared), the
+Durbin-Watson statistic, the mean squared error, sigma, the t statistics,
+the variance of the estimates of the slope and y-intercept, 
+the predicted y values and the residuals of the y values.  (See the METHODS
+section for a description of these statistics.)
+
+The module accepts input data in separate x and y arrays or a single
+2-D array (an array of arrayrefs).  The optional weights are input in a
+separate array.  The module can optionally verify that the input data and
+weights are valid numbers.  If weights are input, the line fit minimizes
+the weighted sum of the squared errors and the following statistics are
+weighted: the correlation coefficient, the Durbin-Watson statistic, the
+mean squared error, sigma and the t statistics.
+
+The module is state-oriented and caches its results.  Once you call the
+setData() method, you can call the other methods in any order or call a
+method several times without invoking redundant calculations.  After calling
+setData(), you can modify the input data or weights without affecting the
+module's results.
+
+The decision to use or not use weighting could be made using your a
+priori knowledge of the data or using supplemental data.  If the data is
+sparse or contains non-random noise, weighting can degrade the solution.
+Weighting is a good option if some points are suspect or less relevant (e.g.,
+older terms in a time series, points that are known to have more noise).
+
+=head1 ALGORITHM
+
+The least-square line is the line that minimizes the sum of the squares
+of the y residuals:
+
+ Minimize SUM((y[i] - (a + b * x[i])) ** 2)
+
+Setting the parial derivatives of a and b to zero yields a solution that
+can be expressed in terms of the means, variances and covariances of x and y:
+
+ b = SUM((x[i] - meanX) * (y[i] - meanY)) / SUM((x[i] - meanX) ** 2) 
+
+ a = meanY - b * meanX
+
+Note that a and b are undefined if all the x values are the same.
+
+If you use weights, each term in the above sums is multiplied by the
+value of the weight for that index.  The program normalizes the weights
+(after copying the input values) so that the sum of the weights equals
+the number of points.  This minimizes the differences between the weighted
+and unweighted equations.
+
+Statistics::LineFit uses equations that are mathematically equivalent to
+the above equations and computationally more efficient.  The module runs
+in O(N) (linear time).
+
+=head1 LIMITATIONS
+
+The regression fails if the input x values are all equal or the only unequal
+x values have zero weights.  This is an inherent limit to fitting a line of
+the form y = a + b * x.  In this case, the module issues an error message
+and methods that return statistical values will return undefined values.
+You can also use the return value of the regress() method to check the
+status of the regression.
+
+As the sum of the squared deviations of the x values approaches zero,
+the module's results becomes sensitive to the precision of floating point
+operations on the host system.
+
+If the x values are not all the same and the apparent "best fit" line is
+vertical, the module will fit a horizontal line.  For example, an input of
+(1, 1), (1, 7), (2, 3), (2, 5) returns a slope of zero, an intercept of 4
+and an R squared of zero.  This is correct behavior because this line is the
+best least-squares fit to the data for the given parameterization 
+(y = a + b * x).
+
+On a 32-bit system the results are accurate to about 11 significant digits,
+depending on the input data.  Many of the installation tests will fail
+on a system with word lengths of 16 bits or fewer.  (You might want to
+upgrade your old 80286 IBM PC.)
+
+=head1 EXAMPLES
+
+=head2 Alternate calling sequence:
+
+ use Statistics::LineFit;
+ $lineFit = Statistics::LineFit->new();
+ $lineFit->setData(\@x, \@y) or die "Invalid regression data\n";
+ if (defined $lineFit->rSquared()
+     and $lineFit->rSquared() > $threshold) 
+ {
+     ($intercept, $slope) = $lineFit->coefficients();
+     print "Slope: $slope  Y-intercept: $intercept\n";
+ }
+
+=head2 Multiple calls with same object, validate input, suppress error messages:
+
+ use Statistics::LineFit;
+ $lineFit = Statistics::LineFit->new(1, 1);
+ while (1) {
+     @xy = read2Dxy();  # User-supplied subroutine
+     $lineFit->setData(\@xy);
+     ($intercept, $slope) = $lineFit->coefficients();
+     if (defined $intercept) {
+         print "Slope: $slope  Y-intercept: $intercept\n";
+     } 
+ }
+
+=head1 METHODS
+
+The module is state-oriented and caches its results.  Once you call the
+setData() method, you can call the other methods in any order or call
+a method several times without invoking redundant calculations.
+
+The regression fails if the x values are all the same.  In this case,
+the module issues an error message and methods that return statistical
+values will return undefined values.  You can also use the return value 
+of the regress() method to check the status of the regression.
+
+=head2 new() - create a new Statistics::LineFit object
+
+ $lineFit = Statistics::LineFit->new();
+ $lineFit = Statistics::LineFit->new($validate);
+ $lineFit = Statistics::LineFit->new($validate, $hush);
+
+ $validate = 1 -> Verify input data is numeric (slower execution)
+             0 -> Don't verify input data (default, faster execution)
+ $hush = 1 -> Suppress error messages
+       = 0 -> Enable error messages (default)
+
+=head2 coefficients() - Return the slope and y intercept
+
+ ($intercept, $slope) = $lineFit->coefficients();
+
+The returned list is undefined if the regression fails.
+
+=head2 durbinWatson() - Return the Durbin-Watson statistic
+
+ $durbinWatson = $lineFit->durbinWatson();
+
+The Durbin-Watson test is a test for first-order autocorrelation in
+the residuals of a time series regression. The Durbin-Watson statistic
+has a range of 0 to 4; a value of 2 indicates there is no
+autocorrelation.
+
+The return value is undefined if the regression fails.  If weights are
+input, the return value is the weighted Durbin-Watson statistic.
+
+=head2 meanSqError() - Return the mean squared error
+
+ $meanSquaredError = $lineFit->meanSqError();
+
+The return value is undefined if the regression fails.  If weights are
+input, the return value is the weighted mean squared error. 
+
+=head2 predictedYs() - Return the predicted y values
+
+ @predictedYs = $lineFit->predictedYs();
+
+The returned list is undefined if the regression fails.
+
+=head2 regress() - Do the least squares line fit (if not already done)
+
+ $lineFit->regress() or die "Regression failed"
+
+You don't need to call this method because it is invoked by the other
+methods as needed.  After you call setData(), you can call regress()
+at any time to get the status of the regression for the current data.
+
+=head2 residuals() - Return predicted y values minus input y values
+
+ @residuals = $lineFit->residuals();
+
+The returned list is undefined if the regression fails.
+
+=head2 rSquared() - Return the square of the correlation coefficient
+
+ $rSquared = $lineFit->rSquared();
+
+R squared, also called the square of the Pearson product-moment correlation
+coefficient, is a measure of goodness-of-fit.  It is the fraction of the
+variation in Y that can be attributed to the variation in X.  A perfect fit
+will have an R squared of 1; fitting a line to the vertices of a
+regular polygon will yield an R squared of zero.  Graphical displays of data
+with an R squared of less than about 0.1 do not show a visible linear trend.
+
+The return value is undefined if the regression fails.  If weights are 
+input, the return value is the weighted correlation coefficient.
+
+=head2 setData() - Initialize (x,y) values and optional weights
+
+ $lineFit->setData(\@x, \@y) or die "Invalid regression data";
+ $lineFit->setData(\@x, \@y, \@weights) or die "Invalid regression data";
+ $lineFit->setData(\@xy) or die "Invalid regression data";
+ $lineFit->setData(\@xy, \@weights) or die "Invalid regression data";
+
+@xy is an array of arrayrefs; x values are $xy[$i][0], y values are
+$xy[$i][1].  (The module does not access any indices greater than $xy[$i][1],
+so the arrayrefs can point to arrays that are longer than two elements.)
+The method identifies the difference between the first and fourth calling
+signatures by examining the first argument.
+
+The optional weights array must be the same length as the data array(s).
+The weights must be non-negative numbers; at least two of the weights
+must be nonzero.  Only the relative size of the weights is significant:
+the program normalizes the weights (after copying the input values) so
+that the sum of the weights equals the number of points.  If you want to
+do multiple line fits using the same weights, the weights must be passed
+to each call to setData().
+
+The method will return zero if the array lengths don't match, there are
+less than two data points, any weights are negative or less than two of
+the weights are nonzero. If the new() method was called with validate = 1,
+the method will also verify that the data and weights are valid numbers.
+Once you successfully call setData(), the next call to any method other than
+new() or setData() invokes the regression.  You can modify the input data
+or weights after calling setData() without affecting the module's results.
+
+=head2 sigma() - Return the standard error of the estimate
+
+$sigma = $lineFit->sigma();
+
+Sigma is an estimate of the homoscedastic standard deviation of the
+error.  Sigma is also known as the standard error of the estimate.
+
+The return value is undefined if the regression fails.  If weights are
+input, the return value is the weighted standard error.
+
+=head2 tStatistics() - Return the t statistics
+
+ (tStatIntercept, $tStatSlope) = $lineFit->tStatistics();
+
+The t statistic, also called the t ratio or Wald statistic, is used to
+accept or reject a hypothesis using a table of cutoff values computed from
+the t distribution.  The t-statistic suggests that the estimated value is
+(reasonable, too small, too large) when the t-statistic is (close to zero,
+large and positive, large and negative).
+
+The returned list is undefined if the regression fails.  If weights 
+are input, the returned values are the weighted t statistics.
+
+=head2 varianceOfEstimates() - Return variances of estimates of intercept, slope
+
+ (varianceIntercept, $varianceSlope) = $lineFit->varianceOfEstimates();
+
+Assuming the data are noisy or inaccurate, the intercept and slope returned
+by the coefficients() method are only estimates of the true intercept and 
+slope.  The varianceofEstimate() method returns the variances of the 
+estimates of the intercept and slope, respectively.  See Numerical Recipes
+in C, section 15.2 (Fitting Data to a Straight Line), equation 15.2.9.
+
+The returned list is undefined if the regression fails.  If weights 
+are input, the returned values are the weighted variances.
+
+=head1 SEE ALSO
+
+ Mendenhall, W., and Sincich, T.L., 2003, A Second Course in Statistics:
+   Regression Analysis, 6th ed., Prentice Hall.
+ Press, W. H., Flannery, B. P., Teukolsky, S. A., Vetterling, W. T., 1992,
+   Numerical Recipes in C : The Art of Scientific Computing, 2nd ed., 
+   Cambridge University Press.
+ The man page for perl(1).
+ The CPAN modules Statistics::OLS, Statistics::GaussHelmert and 
+   Statistics::Regression.
+
+Statistics::LineFit is simpler to use than Statistics::GaussHelmert or
+Statistics::Regression.  Statistics::LineFit was inspired by and borrows some
+ideas from the venerable Statistics::OLS module.  
+
+The significant differences
+between Statistics::LineFit and Statistics::OLS (version 0.07) are:
+
+=over 4
+
+=item B<Statistics::LineFit is more robust.>
+
+Statistics::OLS returns incorrect results for certain input datasets. 
+Statistics::OLS does not deep copy its input arrays, which can lead
+to subtle bugs.  The Statistics::OLS installation test has only one
+test and does not verify that the regression returns correct results.
+In contrast, Statistics::LineFit has over 200 installation tests that use
+various datasets/calling sequences to verify the accuracy of the
+regression to within 1.0e-10.
+
+=item B<Statistics::LineFit is faster.>
+
+For a sequence of calls to new(), setData(\@x, \@y) and regress(),
+Statistics::LineFit is faster than Statistics::OLS by factors of 2.0, 1.6
+and 2.4 for array lengths of 5, 100 and 10000, respectively.
+
+=item B<Statistics::LineFit can do weighted or unweighted regression.>
+
+Statistics::OLS lacks this option.
+
+=item B<Statistics::LineFit has a better interface.>
+
+Once you call the Statistics::LineFit::setData() method, you can call the
+other methods in any order and call methods multiple times without invoking
+redundant calculations.  Statistics::LineFit lets you enable or disable
+data verification or error messages.
+
+=item B<Statistics::LineFit has better code and documentation.>
+
+The code in Statistics::LineFit is more readable, more object oriented and
+more compliant with Perl coding standards than the code in Statistics::OLS.
+The documentation for Statistics::LineFit is more detailed and complete.
+
+=back
+
+=head1 AUTHOR
+
+Richard Anderson, cpan(AT)richardanderson(DOT)org,
+http://www.richardanderson.org
+
+=head1 LICENSE
+
+This program is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+The full text of the license can be found in the LICENSE file included in
+the distribution and available in the CPAN listing for Statistics::LineFit
+(see www.cpan.org or search.cpan.org).
+
+=head1 DISCLAIMER
+
+To the maximum extent permitted by applicable law, the author of this
+module disclaims all warranties, either express or implied, including
+but not limited to implied warranties of merchantability and fitness for
+a particular purpose, with regard to the software and the accompanying
+documentation.
+
+=cut
+
author	dereeper
date	Thu, 24 Jun 2021 13:51:52 +0000
parents
children