Last week I was looking at a really interesting math problem. I needed to write a routine that would accept an array of “observed values” and an array of “expected values” and perform a statistical goodness of fit test to determine if the observed values are (statistically) equal to the expected values or not. This can be done by computing a Chi-square statistic (or a g statistic), and then looking up the computed value against a table of critical Chi-square values. I didn’t like the idea of hard-coding a huge table of Chi-square critical values, and I didn’t like the idea of storing the critical Chi-square values in an external text file. Instead I used ACM Algorithm 299 to write a helper function which accepts a computed Chi-square value and a degrees of freedom value, and which returns the probability that the input value occurred by chance. With the helper in hand, I was then able to write a function which accepts observed and expected values, computes a Chi-square (or g) value, then calls the helper function to see if the computed Chi-square had a 0.05 probability or less or occurring by chance. If so, I reject the null hypothesis and conclude observed and expected are different, otherwise I accept the null hypothesis and conclude that observed and expected are the same (well, actually that there’s not enough evidence to conclude they’re different). Here is the helper function:

public static double ChiSquare(double x, int df)

{

// x = a computed chi-square value. df = degrees of freedom.

// output = prob. the x value occurred by chance.

// so, for example, if result < 0.05 there is only a 5% chance

// that the x value ocurred by chance and therefore

// we conclude that the actual data which produced x is

// NOT the same as the expected data.

// this function can be used to create a ChiSquareTest proedure.

// ACM Algorithm 299 and update ACM TOMS June 1985.

// Uses custom Exp() function below.

if (x <= 0.0 || df < 1) throw new Exception(“parameter x must be positive and parameter df must be 1 or greater in ChiSquared()”);

double a = 0.0; // 299 variable names

double y = 0.0;

double s = 0.0;

double z = 0.0;

double e = 0.0;

double c;

bool even; // is df even?

a = 0.5 * x;

if (df % 2 == 0) even = true; else even = false;

if (df > 1) y = Exp(-a); // ACM update remark (4)

if (even == true) s = y; else s = 2.0 * Gauss(-Math.Sqrt(x));

if (df > 2)

{

x = 0.5 * (df – 1.0);

if (even == true) z = 1.0; else z = 0.5;

if (a > 40.0) // ACM remark (5)

{

if (even == true) e = 0.0; else e = 0.5723649429247000870717135; // log(sqrt(pi))

c = Math.Log(a); // log base e

while (z <= x)

{

e = Math.Log(z) + e;

s = s + Exp(c * z – a – e); // ACM update remark (6)

z = z + 1.0;

}

return s;

} // a > 40.0

else

{

if (even == true) e = 1.0; else e = 0.5641895835477562869480795 / Math.Sqrt(a); // (1 / sqrt(pi))

c = 0.0;

while (z <= x)

{

e = e * (a / z); // ACM update remark (7)

c = c + e;

z = z + 1.0;

}

return c * y + s;

}

} // df > 2

else

{

return s;

}

} // ChiSquare()

private static double Exp(double x) // ACM update remark (3)

{

if (x < -40.0) // 40.0 is a magic number. ACM update remark (8)

return 0.0;

else

return Math.Exp(x);

}