Negative Binomial
The negative binomial distribution is one of the most important discrete distributions. It models a count with more variability than a Poisson (overdispersion), and it also has a clean “waiting time” interpretation in terms of Bernoulli trials.
Important note (WebPPL)
WebPPL does not provide a built-in primitive constructor named NegativeBinomial.
In this reference we therefore show:
how to sample a negative-binomially distributed value using ordinary WebPPL code, and
how to use the negative binomial as a likelihood via
factor(logp)during inference.
Definition (waiting time)
Fix two parameters:
r: the target number of successes (integer,r >= 1)p: success probability in each Bernoulli trial (real,0 < p <= 1)
Run independent Bernoulli trials with success probability p until the r-th success occurs.
Let K be the number of failures observed before that r-th success.
Then:
K ~ NB(r, p)
Support: K ∈ {0, 1, 2, ...} (infinite support).
PMF (probability mass function)
For k = 0, 1, 2, ...:
Intuition (combinatorics)
To have exactly k failures before the r-th success:
the final trial must be a success (the
r-th success),among the first
k + r - 1trials, we need exactlykfailures andr-1successes.
The number of ways to place k failures among k + r - 1 positions is
C(k+r-1, k). Each such sequence has probability (1-p)^k p^r by independence.
Multiplying gives the PMF above.
Parameterization gotchas
Different sources use different conventions. This page uses:
K= number of failures before ther-th successp= probability of success in each trial
A common alternative is to return the total number of trials T = K + r.
(So be careful when comparing to libraries that define the negative binomial in terms of T.)
Special case: Geometric
When r = 1, the negative binomial becomes the geometric distribution over
“failures before the first success”:
In other words, NB(1, p) is geometric in this parameterization.
How to use it in WebPPL
Sampling (waiting-time simulation)
We can sample K directly by simulating Bernoulli trials until r successes occur.
1// Negative binomial as "failures before r-th success" in Bernoulli trials.
2
3var negBinFailures = function(r, p) {
4 // Returns number of failures before the r-th success.
5 return (r === 0)
6 ? 0
7 : (flip(p) ? negBinFailures(r - 1, p) : (1 + negBinFailures(r, p)));
8};
9
10var r = 3;
11var p = 0.4;
12
13var out = {
14 interpretation: "K = #failures before the r-th success",
15 r: r,
16 p: p,
17 samples_K: repeat(8, function() { return negBinFailures(r, p); }),
18
19 // Special case r=1 (geometric in this parameterization)
20 geometric_r1_samples: repeat(8, function() { return negBinFailures(1, p); })
21};
22
23out;
{
interpretation: 'K = #failures before the r-th success',
r: 3,
p: 0.4,
samples_K: [
3, 4, 6, 2,
6, 5, 5, 11
],
geometric_r1_samples: [
1, 0, 1, 1,
0, 1, 6, 3
]
}
Scoring / likelihood via factor
WebPPL supports conditioning using factor(score), which adds score to the log probability
of the current execution. This lets us treat the negative binomial as a likelihood if we implement
its log-PMF.
1// 1) Implement log PMF for NB(r,p) where K = #failures before r-th success.
2// 2) Convert score (log prob) to prob via exp.
3// 3) Use factor(logp) to do inference over p on a small discrete grid.
4
5var logFactorial = function(n) {
6 return (n <= 1) ? 0 : (Math.log(n) + logFactorial(n - 1));
7};
8
9var logChoose = function(n, k) {
10 // log C(n, k)
11 return logFactorial(n) - logFactorial(k) - logFactorial(n - k);
12};
13
14var negBinLogPmf = function(k, r, p) {
15 // log P(K=k) = log C(k+r-1, k) + k log(1-p) + r log(p)
16 return logChoose(k + r - 1, k) + k * Math.log(1 - p) + r * Math.log(p);
17};
18
19var r = 3;
20var p = 0.4;
21
22// Show a few exact point probabilities derived from log-PMF:
23var ks = [0, 1, 2, 3, 4, 5, 10];
24var rows = map(function(k) {
25 var lp = negBinLogPmf(k, r, p);
26 return {k: k, logp: lp, prob: Math.exp(lp)};
27}, ks);
28
29// CDF/tail examples without any truncation (finite sums):
30var range = function(a, b) {
31 return (a > b) ? [] : [a].concat(range(a + 1, b));
32};
33
34var pmf = function(k) { return Math.exp(negBinLogPmf(k, r, p)); };
35
36var k0 = 5;
37var pLe = sum(map(pmf, range(0, k0))); // P(K <= k0)
38var pLt = sum(map(pmf, range(0, k0 - 1))); // P(K < k0)
39var pGe = 1 - pLt; // P(K >= k0) = 1 - P(K < k0)
40var pGt = 1 - pLe; // P(K > k0) = 1 - P(K <= k0)
41
42// Posterior over p given an observed k using factor(logp):
43var kObs = 5;
44var grid = [0.2, 0.4, 0.6, 0.8];
45
46var model = function() {
47 var pCand = sample(Categorical({vs: grid})); // uniform over grid
48 factor(negBinLogPmf(kObs, r, pCand));
49 return pCand;
50};
51
52var posterior = Infer({method: "enumerate", model: model});
53
54var supp = posterior.support();
55var probs = map(function(v) { return Math.exp(posterior.score(v)); }, supp);
56
57var out = {
58 params: {r: r, p: p},
59 selected_pmf_values: rows,
60
61 cdf_and_tails_at_k: k0,
62 p_le_k: pLe,
63 p_ge_k: pGe,
64 p_gt_k: pGt,
65
66 inference_example: {
67 observed_k: kObs,
68 p_grid: grid,
69 posterior_support: supp,
70 posterior_probs: probs,
71 sum: sum(probs)
72 }
73};
74
75out;
{
params: { r: 3, p: 0.4 },
selected_pmf_values: [
{ k: 0, logp: -2.748872195622465, prob: 0.06400000000000002 },
{ k: 1, logp: -2.161085530720346, prob: 0.11520000000000004 },
{ k: 2, logp: -1.9787639739263918, prob: 0.13823999999999997 },
{ k: 3, logp: -1.9787639739263916, prob: 0.13824 },
{ k: 4, logp: -2.0841244895842173, prob: 0.12441600000000008 },
{ k: 5, logp: -2.258477876728995, prob: 0.10450944000000009 },
{ k: 10, logp: -3.6674736912559465, prob: 0.0255409127424 }
],
cdf_and_tails_at_k: 5,
p_le_k: 0.6846054400000002,
p_ge_k: 0.41990399999999983,
p_gt_k: 0.31539455999999977,
inference_example: {
observed_k: 5,
p_grid: [ 0.2, 0.4, 0.6, 0.8 ],
posterior_support: [ 0.8, 0.6, 0.4, 0.2 ],
posterior_probs: [
0.016427104722792598,
0.22176591375770008,
0.4989733059548254,
0.2628336755646817
],
sum: 0.9999999999999998
}
}
Alternative construction: Gamma–Poisson mixture
The negative binomial can also be seen as a Poisson distribution whose rate is gamma-distributed. This is one reason it is widely used for overdispersed count data.
In the (r, p) parameterization used above, one convenient mapping is:
Then the marginal distribution of K is NB(r, p).
1// Negative binomial as a Gamma–Poisson mixture (overdispersed counts).
2
3var mean = function(xs) {
4 return sum(xs) / xs.length;
5};
6
7var variance = function(xs) {
8 var m = mean(xs);
9 var sq = map(function(x) { return (x - m) * (x - m); }, xs);
10 return sum(sq) / (xs.length - 1);
11};
12
13var r = 3;
14var p = 0.4;
15
16// Gamma-Poisson construction for NB(r,p) on k = 0,1,2,...
17var theta = (1 - p) / p; // scale
18var sampleGammaPoissonNB = function(r, p) {
19 var lambda = sample(Gamma({shape: r, scale: (1 - p) / p}));
20 return sample(Poisson({mu: lambda}));
21};
22
23// Waiting-time sampler (same NB parameterization)
24var negBinFailures = function(r, p) {
25 return (r === 0)
26 ? 0
27 : (flip(p) ? negBinFailures(r - 1, p) : (1 + negBinFailures(r, p)));
28};
29
30var N = 200;
31var sWait = repeat(N, function() { return negBinFailures(r, p); });
32var sMix = repeat(N, function() { return sampleGammaPoissonNB(r, p); });
33
34var out = {
35 params: {r: r, p: p, gamma_scale_theta: theta},
36 waiting_time: {mean: mean(sWait), var: variance(sWait), first10: sWait.slice(0, 10)},
37 gamma_poisson: {mean: mean(sMix), var: variance(sMix), first10: sMix.slice(0, 10)}
38};
39
40out;
{
params: { r: 3, p: 0.4, gamma_scale_theta: 1.4999999999999998 },
waiting_time: {
mean: 4.72,
var: 11.860904522613069,
first10: [
3, 4, 6, 2, 6,
5, 5, 11, 2, 2
]
},
gamma_poisson: {
mean: 4.3,
var: 11.778894472361808,
first10: [
3, 2, 7, 6, 5,
2, 1, 2, 2, 0
]
}
}