comparison hexagram-6ae12361157c/hexagram/statistics.js @ 0:1407e3634bcf draft default tip

Uploaded r11 from test tool shed.
author adam-novak
date Tue, 22 Oct 2013 14:17:59 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1407e3634bcf
1 // statistics.js: Web Worker file to run statistical tests in the background.
2
3 // Constants:
4 // How many pseudocount trials should we use for the binomial test?
5 var BINOMIAL_PSEUDOCOUNTS = 5;
6
7 // Should we log information about suspicious p values to the console for manual
8 // spot checking?
9 var LOG_SUSPICIOUS = false;
10
11 // Go get jStat. Hope it's happy in Worker-land.
12 importScripts("jstat-1.0.0.js");
13
14 // Make a fake console to catch jstat warnings, so they don't crash the script.
15 console = {
16 warn: print
17 }
18
19 onmessage = function(message) {
20 // Handle incoming messages from the page. Each message's data is an RPC
21 // request, with "name" set to a function name, "args" set to an array of
22 // arguments, and "id" set to an ID that should be returned with the return
23 // value in a reply message. If the function call fails, an error is sent
24 // back.
25
26
27 try {
28 // Go get the specified global function, and apply it on the given
29 // arguments. Use the global scope ("self") as its "this".
30 var return_value = self[message.data.name].apply(self,
31 message.data.args);
32
33 } catch(exception) {
34
35 // Send the error back to the page instead of a return value.
36 // Unfortunately, errors themselves can't be cloned, so we do all the
37 // message making here and send back a string.
38
39 // First we build a string with all the parts of the error we can get.
40 var error_message = "Error in web worker doing job " + message.data.id;
41 error_message += "\n";
42 error_message += exception.name + ": " + exception.message;
43 error_message += "\n";
44 error_message += "Full details:\n";
45 for(field in exception) {
46 if(field == "name" || field == "message") {
47 // Already got these.
48 continue;
49 }
50
51 // Copy the field into the message as a string.
52 error_message += field + ": " + exception[field] + "\n";
53 }
54 error_message += "Call: " + message.data.name + "(";
55 for(var i = 0; i < message.data.args.length; i++) {
56 error_message += message.data.args[i];
57 if(i + 1 < message.data.args.length) {
58 // Have an argument after this.
59 error_message += ", ";
60 }
61 }
62 error_message += ")";
63
64 postMessage({
65 id: message.data.id,
66 error: error_message
67 });
68
69 return;
70 }
71
72
73 // Send the return value back with the id.
74 postMessage({
75 id: message.data.id,
76 return_value: return_value
77 });
78 }
79
80 function print(message) {
81 // Print a message to the console of the parent page.
82 postMessage({
83 log: message
84 });
85 }
86
87 function statistics_for_matrix(matrix_url, in_list, out_list, all_list) {
88 // Download the given score matrix, do stats between in_list and out_list
89 // for each layer in it, and return an object from layer name to p value.
90 // all_list specifies the names of all signatures that figure into the
91 // analysis at all.
92
93 // Download the matrix synchronously.
94 // See https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/Synch
95 // ronous_and_Asynchronous_Requests
96 // A side effect of this is that we won't have more simultaneous downloads
97 // than workers, which is probably good.
98 // This holds the request.
99 var request = new XMLHttpRequest();
100 // Get the layer data by GET. The false makes it synchronous.
101 request.open("GET", matrix_url, false);
102 request.send(null);
103
104 // Now we have the layer TSV
105 // But we don't have our fancy jQuery TSV parser. Parse it manually.
106
107 // This holds an object of layer data objects (from signature to float) by
108 // layer name.
109 layers = {};
110
111 // This holds the array of lines
112 // Split on newlines (as seen in jQuery.tsv.js)
113 var lines = request.responseText.split(/\r?\n/);
114
115 // Line 0 gives all the layer names, but the first thing isn't a layer name
116 // (since it's above the signature column).
117 var layer_names = lines[0].split(/\t/);
118 for(var i = 1; i < layer_names.length; i++) {
119 // Make sure we have an object for this layer
120 layers[layer_names[i]] = {};
121 }
122
123 // The rest give values per layer for the hex in column 1.
124 for(var i = 1; i < lines.length; i++) {
125 // This holds the parts of each line
126 var parts = lines[i].split(/\t/);
127
128 if(parts[0]) {
129 // We actually have data
130
131 // Get the singature
132 var signature = parts[0];
133
134 for(var j = 1; j < parts.length; j++) {
135 // Go through each non-signature entry and set the appropriate
136 // layer's value for this signature.
137 layers[layer_names[j]][signature] = parseFloat(parts[j]);
138 }
139 }
140 }
141
142 // Now we've parsed the matrix.
143 // Go do stats for each layer.
144 // This holds our calculated p valued by layer name.
145 var p_values = {};
146
147 print("Running statistics for (up to) " + layer_names.length +
148 " layers from matrix " + matrix_url);
149
150 for(var i = 1; i < layer_names.length; i++) {
151 // Pass the layer data to the per-layer statistics, and get the p value
152 // back. It's probably easier to do this in this worker than to go
153 // invoke more workers.
154 p_values[layer_names[i]] = statistics_for_layer(layers[layer_names[i]],
155 in_list, out_list, all_list);
156 }
157
158 // We've now calculated a p value for every layer in the matrix. Return the
159 // calculated p values labeled by layer.
160 return p_values;
161
162 }
163
164 function statistics_for_layer(layer_data, in_list, out_list, all_list) {
165 // Run the appropriate statistical test for the passed layer data, between
166 // the given in and out arrays of signatures. all_list specifies the names
167 // of all signatures that figure into the analysis at all. Return the p
168 // value for the layer, or NaN if no p value could be calculated.
169
170 // This holds whether the layer is discrete
171 var is_discrete = true;
172
173 // This holds whether the layer is binary
174 var is_binary = true;
175
176 for(var signature in layer_data) {
177 if(layer_data[signature] > 1 || layer_data[signature] < 0) {
178 // Not a binary layer
179 is_binary = false;
180 }
181
182 if(layer_data[signature] % 1 !== 0) {
183 // It's a float
184 is_binary = false;
185 is_discrete = false;
186 }
187 }
188
189 if(is_binary) {
190 // This is a binary/dichotomous layer, so run a binomial test.
191 return binomial_compare(layer_data, in_list, out_list, all_list);
192 } else if (is_discrete) {
193 // This is a multinomial/categorical layer
194 // TODO: statistics for discrete non-binary layers
195 return NaN;
196 } else {
197 // This is a continuous layer, so run a t test
198 return t_compare(layer_data, in_list, out_list, all_list);
199 }
200
201 }
202
203 function statistics_for_url(layer_url, in_list, out_list, all_list) {
204 // Run the stats for the layer with the given url, between the given in and
205 // out arrays of signatures. all_list specifies the names of all signatures
206 // that figure into the analysis at all. Return the p value for the layer,
207 // or NaN if no p value could be calculated.
208
209 print("Running statistics for individual layer " + layer_url);
210
211 // Download the layer data synchronously.
212 // See https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/Synch
213 // ronous_and_Asynchronous_Requests
214 // A side effect of this is that we won't have more simultaneous downloads
215 // than workers, which is probably good.
216 // This holds the request.
217 var request = new XMLHttpRequest();
218 // Get the layer data by GET. The false makes it synchronous.
219 request.open("GET", layer_url, false);
220 request.send(null);
221
222 // Now we have the layer TSV
223 // But we don't have our fancy jQuery TSV parser. Parse it manually.
224
225 // This holds the layer data (signature to float)
226 var layer_data = {}
227
228 // This holds the array of lines
229 // Split on newlines (as seen in jQuery.tsv.js)
230 var lines = request.responseText.split(/\r?\n/);
231
232 for(var i = 0; i < lines.length; i++) {
233 // This holds the parts of each line
234 var parts = lines[i].split(/\t/);
235
236 if(parts[0]) {
237 // We actually have data
238 // Parse the layer value for this signature
239 var value = parseFloat(parts[1]);
240
241 // Store the value in the layer data
242 layer_data[parts[0]] = value;
243 }
244 }
245
246 // Run stats on the downloaded data
247 return statistics_for_layer(layer_data, in_list, out_list, all_list);
248 }
249
250 function t_compare(layer_data, in_list, out_list, all_list) {
251 // Given the data of a continuous layer object (an object from signature
252 // name to float (or undefined)), and arrays of the names of "in" and "out"
253 // signatures, do a t test test for whether the in signatures differ from
254 // the out signatures. Returns an object of metadata, with "p_value" set to
255 // either the p value of the test (two-tailed), or NaN if the test cannot be
256 // performed (due to, e.g. fewer than 2 samples in one category).
257
258 // Go through the in list and calculate all the summary statistics
259 // How many non-NaN values?
260 var number_in = 0;
261 // What is the sum?
262 var sum_in = 0;
263
264 for(var i = 0; i < in_list.length; i++) {
265 if(!isNaN(layer_data[in_list[i]])) {
266 number_in++;
267 sum_in += layer_data[in_list[i]];
268 }
269 }
270
271 // We've done one pass, so we know if we have any in list data actually
272 if(number_in < 2) {
273 // Not enough to run the t test
274 return NaN;
275 }
276
277 // What is the mean?
278 var mean_in = sum_in / number_in;
279
280 // What is the second moment (sum of squares of differences from the mean)
281 var second_moment_in = 0;
282 for(var i = 0; i < in_list.length; i++) {
283 if(!isNaN(layer_data[in_list[i]])) {
284 second_moment_in += Math.pow(layer_data[in_list[i]] - mean_in, 2);
285 }
286 }
287
288 // What is the unbiased variance?
289 unbiased_variance_in = second_moment_in / (number_in - 1);
290
291 // Now go through the same process for the out list
292 // How many non-NaN values?
293 var number_out = 0;
294 // What is the sum?
295 var sum_out = 0;
296
297 for(var i = 0; i < out_list.length; i++) {
298 if(!isNaN(layer_data[out_list[i]])) {
299 number_out++;
300 sum_out += layer_data[out_list[i]];
301 }
302 }
303
304 // We've done one pass, so we know if we have any out list data actually
305 if(number_out < 2) {
306 // Not enough to run the t test
307 return NaN;
308 }
309
310 // What is the mean?
311 var mean_out = sum_out / number_out;
312
313 // What is the second moment (sum of squares of differences from the mean)
314 var second_moment_out = 0;
315 for(var i = 0; i < out_list.length; i++) {
316 if(!isNaN(layer_data[out_list[i]])) {
317 second_moment_out += Math.pow(layer_data[out_list[i]] - mean_out,
318 2);
319 }
320 }
321
322 // What is the unbiased variance?
323 unbiased_variance_out = second_moment_out / (number_out - 1);
324
325 // We can't do the test if both variances are 0
326 if(unbiased_variance_in == 0 && unbiased_variance_out == 0) {
327 return NaN;
328 }
329
330 // Now we can calculate the t test two-tailed p value
331 var p_value = t_test(mean_in, unbiased_variance_in, number_in, mean_out,
332 unbiased_variance_out, number_out);
333
334 // And return it in a dict with other metadata.
335 // We don't really have any other metadata.
336 return {
337 p_value: p_value
338 };
339 }
340
341 function t_test(mean_in, unbiased_variance_in, number_in, mean_out,
342 unbiased_variance_out, number_out) {
343
344 // Given the mean, unbiased variance, and number of samples for both the in
345 // group and the out group, compute the p value for the t test with unequal
346 // sample sizes and unequal variances, testing to see whether the means
347 // differ (a two-tailed "Welch's" t test). See
348 // https://en.wikipedia.org/wiki/Student%27s_t-test
349 // Assumes we have enough samples to actually perform the test.
350
351 // First, calculate the t statistic, which is where our observations fall on
352 // the t distribution.
353 var t_statistic = (mean_in - mean_out) / Math.sqrt((unbiased_variance_in /
354 number_in) + (unbiased_variance_out / number_out));
355
356
357 // Calculate the degrees of freedom for the particular t distribution that
358 // we ought to compare the statistic against
359 var degrees_of_freedom = Math.pow((unbiased_variance_in / number_in) +
360 (unbiased_variance_out / number_out), 2) /
361 ((Math.pow(unbiased_variance_in / number_in, 2) / (number_in - 1)) +
362 (Math.pow(unbiased_variance_out / number_out, 2) / (number_out - 1)));
363
364 // Now we have to compare the t statistic to the t test CDF available via
365 // the totally undocumented jstat.pt = function(q, df, ncp, lower_tail, log)
366 // where:
367 // q is the t statistic value to calculate the cdf at
368 // df is the degrees of freedom
369 // ncp is the "mu" parameter for the t distributiuon. I think this sets the
370 // mean, and it's OK to leave blank.
371 // lower_tail presumably specifies if we want the lower or upper tail of the
372 // CDF. Defaults to true.
373 // Log specifies if we want the log probability. Defaults to false.
374
375 // Make the t statistic be on the low side of the distribution, and
376 // calculate the lower tail's area using the CDF.
377 var one_tail_probability = jstat.pt(0 - Math.abs(t_statistic),
378 degrees_of_freedom);
379
380 // Return the two-tailed p value, which, since the t distribution is
381 // symmetric, is just twice the single-tail probability
382 return 2 * one_tail_probability;
383
384 }
385
386 function binomial_compare(layer_data, in_list, out_list, all_list) {
387 // Given the data of a binary layer object (an object from signature name to
388 // 0 or 1 (or undefined)), and arrays of the names of "in" and "out"
389 // signatures, do a binomial test for whether the in signatures differ from
390 // the out signatures. Uses a number of pseudocount trials as specified in
391 // the global constant BINOMIAL_PSEUDOCOUNTS Returns an object of metadata,
392 // with "p_value" set to either the p value of the test (two-tailed), or NaN
393 // if the test cannot be performed. all_list specifies the names of all
394 // signatures that figure into the analysis at all (i.e. those which the
395 // user hasn't filtered out), which we use when calculating how many of our
396 // pseudocounts should be successes. Signature names appearing in all_list
397 // but with no data in layer_data are not counted.
398
399
400 // Work out the distribution from the out list
401 // How many out signatures are 1?
402 var outside_yes = 0;
403 // And are 0?
404 var outside_no = 0;
405
406 for(var i = 0; i < out_list.length; i++) {
407 if(layer_data[out_list[i]] === 1) {
408 // This is a yes and it's outside.
409 outside_yes++;
410 } else if(layer_data[out_list[i]] === 0) {
411 // A no and outside
412 outside_no++;
413 }
414 }
415
416 // It's OK for all the outside hexes to be 0 now. Pseudocounts can give us a
417 // p value.
418
419 // Now work out our pseudocounts.
420 // How many signatures in all_list are successes?
421 var all_yes = 0;
422 // And how many are failures (as opposed to undef)
423 var all_no = 0;
424
425 for(var i = 0; i < all_list.length; i++) {
426 if(layer_data[all_list[i]] === 1) {
427 // A yes anywhere
428 all_yes++;
429 } else if(layer_data[all_list[i]] === 0) {
430 // A real no (not a no-data) anywhere
431 all_no++;
432 }
433 }
434
435 // It't not OK for there to be no hexes in the all set. Maybe they filtered
436 // out all the ones with any data?
437 if(all_yes + all_no == 0) {
438 // TODO: Sure wish we had layer names here.
439 print("No signatures were available with data for this layer.");
440 return NaN;
441 }
442
443 // Calculate how many pseudo-yeses we should have.
444 // Match the frequency in all signatures.
445 var pseudo_yes = BINOMIAL_PSEUDOCOUNTS * (all_yes / (all_yes + all_no));
446
447 // pseudo-trials is just BINOMIAL_PSEUDOCOUNTS
448
449 // This holds the probability of being a 1 for the out list.
450 // We want to test if the in list differs significantly from this.
451 var background_probability = (outside_yes + pseudo_yes) / (outside_yes +
452 outside_no + BINOMIAL_PSEUDOCOUNTS);
453
454 if(background_probability == 0) {
455 // Can't do the binomial test in this case. Somehow there were no yeses
456 // anywhere.
457 return NaN;
458 }
459
460 // How many 1s are in the in list?
461 var inside_yes = 0;
462 // And how many 0s?
463 var inside_no = 0;
464
465 for(var i = 0; i < in_list.length; i++) {
466 if(layer_data[in_list[i]] === 1) {
467 // This is a yes and it's inside.
468 inside_yes++;
469 } else if(layer_data[in_list[i]] === 0) {
470 // A no and it's inside
471 inside_no++;
472 }
473 }
474
475 // Return the p value for rejecting the null hypothesis that the in
476 // signatures follow the background distribution.
477 var p = binomial_test(inside_yes + inside_no, inside_yes,
478 background_probability);
479
480 if(LOG_SUSPICIOUS && (p == 0 || p == 1)) {
481 // We got an odd p value. Complain about it.
482 print("Got suspicious p value " + p);
483 print("Was binomial test for " + inside_yes + " successes in " +
484 (inside_yes + inside_no) + " trials at probability " +
485 background_probability);
486 print("Background was " + outside_yes + " out of " + (outside_yes +
487 outside_no) + " with " + pseudo_yes + " out of " +
488 BINOMIAL_PSEUDOCOUNTS + " pseudocounts.");
489 }
490
491 // Return our p value as "p_value", and also how many non-pseudocount
492 // successes were in the in_list and the out_list.
493 return {
494 p_value: p,
495 inside_yes: inside_yes,
496 outside_yes: outside_yes
497 };
498 }
499
500 function binomial_test(trials, successes, success_probability) {
501 if(trials < successes) {
502 print("Trying to test " + trials + " trials with " + successes +
503 " successes!");
504 }
505
506 // Return the p value for rejecting the null hypothesis that the observed
507 // number of successes happened in the observed number of trials when the
508 // probability of success was success_probability. Does a Binomial
509 // test.
510
511 // Calculate the P value
512 // This must be terribly complicated since nobody seems to have written up
513 // how to do it as anything other than an arcane stats ritual.
514 // Something close: http://www.johnmyleswhite.com/notebook/2012/04/14/implem
515 // enting-the-exact-binomial-test-in-julia/
516 // How scipy.stats does it (x = successes, n = trials, p = supposed
517 // probability):
518 // SourceForge says Scipy is BSD licensed, so we can steal this code for our
519 // comments.
520 /*
521 d = distributions.binom.pmf(x,n,p)
522 rerr = 1+1e-7
523 if (x < p*n):
524 i = np.arange(np.ceil(p*n),n+1)
525 y = np.sum(distributions.binom.pmf(i,n,p) <= d*rerr,axis=0)
526 pval = distributions.binom.cdf(x,n,p) + distributions.binom.sf(n-y,
527 n,p)
528 else:
529 i = np.arange(np.floor(p*n))
530 y = np.sum(distributions.binom.pmf(i,n,p) <= d*rerr,axis=0)
531 pval = distributions.binom.cdf(y-1,n,p) + distributions.binom.sf(
532 x-1,n,p)
533 */
534 // There is of course no justification for why this would work.
535 // What it's actually doing is a complicated Numpy vectorized operation to
536 // find the boundary of the tail we don't have, and then adding the CDF of
537 // the lower tail boundary and (1-CDF) of the upper tail boundary (which is
538 // the P value by definition).
539
540 // This holds the probability of exactly what we've observed under the null
541 // hypothesis.
542 var observed_probability = binomial_pmf(trials, successes,
543 success_probability);
544
545 if(successes < trials * success_probability) {
546 // We know anything with fewer successes than this is more extreme. But
547 // how many successes would we need to have an equally extreme but
548 // higher than expected number of successes?
549 // We should sum down from all successes. (We'll sum from small to large
550 // so it's OK numerically.)
551
552 // This holds the total probability of everything more extremely
553 // successful than what we've observed.
554 var other_tail_total_probability = 0;
555
556 // TODO: implement some better sort of search thing and use CDF
557 for(var other_tail_start = trials; other_tail_start >=
558 Math.ceil(trials * success_probability); other_tail_start--) {
559
560 // Get the probability for this particular case
561 var case_probability = binomial_pmf(trials, other_tail_start,
562 success_probability);
563
564 if(case_probability > observed_probability) {
565 // This case is actually less extreme than what we've observed,
566 // so our summation is complete.
567
568 break;
569 } else {
570 // This case is more extreme than what we've observed, so use it
571 other_tail_total_probability += case_probability;
572 }
573 }
574
575 // This holds the probability in this tail
576 var this_tail_probability = binomial_cdf(trials, successes,
577 success_probability)
578
579
580 // Return the total probability from both tails, clamped to 1.
581 return Math.min(this_tail_probability + other_tail_total_probability,
582 1.0);
583 } else {
584 // We know anything with more successes than this is more extreme. But
585 // how few successes would we need to have an equally extreme but lower
586 // than expected number of successes?
587 // We will sum up from 0 successes. We really ought to use the CDF
588 // somehow, but I can't think of how we would do it.
589
590 // This holds the total probability of everything more extremely
591 // failureful than what we've observed.
592 var other_tail_total_probability = 0;
593
594 for(var other_tail_end = 0; other_tail_end <
595 Math.floor(trials * success_probability); other_tail_end++) {
596 // We only have to iterate up to the peak (most likely) value.
597
598 // Get the probability for this particular case
599 var case_probability = binomial_pmf(trials, other_tail_end,
600 success_probability);
601
602 if(case_probability > observed_probability) {
603 // This case is actually less extreme than what we've observed,
604 // so our summation is complete.
605 break;
606 } else {
607 // This case is more extreme than what we've observed, so use it
608 other_tail_total_probability += case_probability;
609 }
610
611 }
612
613 // This holds the probability in this tail. It is equal to the
614 // probability up to, but not including, where this tail starts. So even
615 // if the tail starts at the highest possible number of successes, it
616 // has some probability. successes can't be 0 here (since then we'd be
617 // below any nonzero expected probability and take the other branch.
618 // Since it's a positive integer, it must be 1 or more, so we can
619 // subtract 1 safely.
620 var this_tail_probability = 1 - binomial_cdf(trials, successes - 1,
621 success_probability);
622
623 // Return the total probability from both tails, clamped to 1
624 return Math.min(this_tail_probability + other_tail_total_probability,
625 1.0);
626 }
627
628
629 }
630
631 function binomial_cdf(trials, successes, success_probability) {
632 // The Binomial distribution's cumulative distribution function. Given a
633 // number of trials, a number of successes, and a success probability,
634 // return the probability of having observed that many successes or fewer.
635
636 // We compute this efficiently using the "regularized incomplete beta
637 // function", AKA the beta distribution cdf, which we get from jstat.
638 // See http://en.wikipedia.org/wiki/Binomial_distribution#Cumulative_distrib
639 // ution_function and http://en.wikipedia.org/wiki/Regularized_incomplete_be
640 // ta_function#Incomplete_beta_function
641
642 if(trials == successes) {
643 // jStat doesn't want a 0 alpha for its beta distribution (no failures)
644 // Calculate this one by hand (it's easy)
645 return 1;
646 }
647
648 if(trials < successes) {
649 // This should never happen. TODO: Debug when it happens.
650 print("Error: trials (" + trials + ") < successes (" + successes +
651 ")!");
652 return NaN;
653 }
654
655 // This is the observation that we want the beta distribution CDF before
656 var beta_observation = 1 - success_probability;
657
658 // These are the parameters of the relavent beta distribution
659 var beta_alpha = trials - successes;
660 var beta_beta = successes + 1;
661
662 // Return the beta distribution CDF value, which happens to also be our CDF.
663 return jstat.pbeta(beta_observation, beta_alpha, beta_beta);
664 }
665
666 function binomial_pmf(trials, successes, success_probability) {
667 // The Binomial distribution's probability mass function. Given a number of
668 // trials, a number of successes, and the probability of success on each
669 // trial, calculate the probability of observing that many successes in that
670 // many trials with the given success rate.
671
672 // The probability of this many successes in this many trials at this
673 // success rate is the probability of succeeding so many times and failing
674 // so many times, summed over all the mutually exclusive arrangements of
675 // successes and failures.
676 return (choose(trials, successes) *
677 Math.pow(success_probability, successes) *
678 Math.pow(1 - success_probability, trials - successes));
679
680 }
681
682 function choose(available, selected) {
683 // The choose function: from available distinct objects, how many ways are
684 // there to select selected of them. Returns "available choose selected".
685 // Works with large input numbers that are too big to take the factorials
686 // of.
687
688 // We use a neat overflow-robust algorithm that eliminates the factorials
689 // and makes the computation a multiplication of numbers greater than one.
690 // So, no overflow unless the result itself is too big.
691 // See http://arantxa.ii.uam.es/~ssantini/writing/notes/s667_binomial.pdf
692
693 if(selected < available - selected) {
694 // It would be faster to think about choosing what we don't include. So
695 // do that instead.
696 return choose(available, available - selected);
697 }
698
699 // This holds the result we are accumulating. Initialize to the
700 // multiplicative identity.
701 var result = 1;
702
703 for(var i = 1; i < available - selected + 1; i++) {
704 result *= (1 + (selected / i));
705 }
706
707 // TODO: The result ought always to be an integer. Ensure this.
708 return result;
709 }