gonum.org/v1/gonum@v0.14.0/stat/distuv/statdist.go (about) 1 // Copyright ©2018 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package distuv 6 7 import ( 8 "math" 9 10 "gonum.org/v1/gonum/mathext" 11 ) 12 13 // Bhattacharyya is a type for computing the Bhattacharyya distance between 14 // probability distributions. 15 // 16 // The Bhattacharyya distance is defined as 17 // 18 // D_B = -ln(BC(l,r)) 19 // BC = \int_-∞^∞ (p(x)q(x))^(1/2) dx 20 // 21 // Where BC is known as the Bhattacharyya coefficient. 22 // The Bhattacharyya distance is related to the Hellinger distance by 23 // 24 // H(l,r) = sqrt(1-BC(l,r)) 25 // 26 // For more information, see 27 // 28 // https://en.wikipedia.org/wiki/Bhattacharyya_distance 29 type Bhattacharyya struct{} 30 31 // DistBeta returns the Bhattacharyya distance between Beta distributions l and r. 32 // For Beta distributions, the Bhattacharyya distance is given by 33 // 34 // -ln(B((α_l + α_r)/2, (β_l + β_r)/2) / (B(α_l,β_l), B(α_r,β_r))) 35 // 36 // Where B is the Beta function. 37 func (Bhattacharyya) DistBeta(l, r Beta) float64 { 38 // Reference: https://en.wikipedia.org/wiki/Hellinger_distance#Examples 39 return -mathext.Lbeta((l.Alpha+r.Alpha)/2, (l.Beta+r.Beta)/2) + 40 0.5*mathext.Lbeta(l.Alpha, l.Beta) + 0.5*mathext.Lbeta(r.Alpha, r.Beta) 41 } 42 43 // DistNormal returns the Bhattacharyya distance Normal distributions l and r. 44 // For Normal distributions, the Bhattacharyya distance is given by 45 // 46 // s = (σ_l^2 + σ_r^2)/2 47 // BC = 1/8 (μ_l-μ_r)^2/s + 1/2 ln(s/(σ_l*σ_r)) 48 func (Bhattacharyya) DistNormal(l, r Normal) float64 { 49 // Reference: https://en.wikipedia.org/wiki/Bhattacharyya_distance 50 m := l.Mu - r.Mu 51 s := (l.Sigma*l.Sigma + r.Sigma*r.Sigma) / 2 52 return 0.125*m*m/s + 0.5*math.Log(s) - 0.5*math.Log(l.Sigma) - 0.5*math.Log(r.Sigma) 53 } 54 55 // Hellinger is a type for computing the Hellinger distance between probability 56 // distributions. 57 // 58 // The Hellinger distance is defined as 59 // 60 // H^2(l,r) = 1/2 * int_x (\sqrt(l(x)) - \sqrt(r(x)))^2 dx 61 // 62 // and is bounded between 0 and 1. Note the above formula defines the squared 63 // Hellinger distance, while this returns the Hellinger distance itself. 64 // The Hellinger distance is related to the Bhattacharyya distance by 65 // 66 // H^2 = 1 - exp(-D_B) 67 // 68 // For more information, see 69 // 70 // https://en.wikipedia.org/wiki/Hellinger_distance 71 type Hellinger struct{} 72 73 // DistBeta computes the Hellinger distance between Beta distributions l and r. 74 // See the documentation of Bhattacharyya.DistBeta for the distance formula. 75 func (Hellinger) DistBeta(l, r Beta) float64 { 76 db := Bhattacharyya{}.DistBeta(l, r) 77 return math.Sqrt(-math.Expm1(-db)) 78 } 79 80 // DistNormal computes the Hellinger distance between Normal distributions l and r. 81 // See the documentation of Bhattacharyya.DistNormal for the distance formula. 82 func (Hellinger) DistNormal(l, r Normal) float64 { 83 db := Bhattacharyya{}.DistNormal(l, r) 84 return math.Sqrt(-math.Expm1(-db)) 85 } 86 87 // KullbackLeibler is a type for computing the Kullback-Leibler divergence from l to r. 88 // 89 // The Kullback-Leibler divergence is defined as 90 // 91 // D_KL(l || r ) = \int_x p(x) log(p(x)/q(x)) dx 92 // 93 // Note that the Kullback-Leibler divergence is not symmetric with respect to 94 // the order of the input arguments. 95 type KullbackLeibler struct{} 96 97 // DistBeta returns the Kullback-Leibler divergence between Beta distributions 98 // l and r. 99 // 100 // For two Beta distributions, the KL divergence is computed as 101 // 102 // D_KL(l || r) = log Γ(α_l+β_l) - log Γ(α_l) - log Γ(β_l) 103 // - log Γ(α_r+β_r) + log Γ(α_r) + log Γ(β_r) 104 // + (α_l-α_r)(ψ(α_l)-ψ(α_l+β_l)) + (β_l-β_r)(ψ(β_l)-ψ(α_l+β_l)) 105 // 106 // Where Γ is the gamma function and ψ is the digamma function. 107 func (KullbackLeibler) DistBeta(l, r Beta) float64 { 108 // http://bariskurt.com/kullback-leibler-divergence-between-two-dirichlet-and-beta-distributions/ 109 if l.Alpha <= 0 || l.Beta <= 0 { 110 panic("distuv: bad parameters for left distribution") 111 } 112 if r.Alpha <= 0 || r.Beta <= 0 { 113 panic("distuv: bad parameters for right distribution") 114 } 115 lab := l.Alpha + l.Beta 116 l1, _ := math.Lgamma(lab) 117 l2, _ := math.Lgamma(l.Alpha) 118 l3, _ := math.Lgamma(l.Beta) 119 lt := l1 - l2 - l3 120 121 r1, _ := math.Lgamma(r.Alpha + r.Beta) 122 r2, _ := math.Lgamma(r.Alpha) 123 r3, _ := math.Lgamma(r.Beta) 124 rt := r1 - r2 - r3 125 126 d0 := mathext.Digamma(l.Alpha + l.Beta) 127 ct := (l.Alpha-r.Alpha)*(mathext.Digamma(l.Alpha)-d0) + (l.Beta-r.Beta)*(mathext.Digamma(l.Beta)-d0) 128 129 return lt - rt + ct 130 } 131 132 // DistNormal returns the Kullback-Leibler divergence between Normal distributions 133 // l and r. 134 // 135 // For two Normal distributions, the KL divergence is computed as 136 // 137 // D_KL(l || r) = log(σ_r / σ_l) + (σ_l^2 + (μ_l-μ_r)^2)/(2 * σ_r^2) - 0.5 138 func (KullbackLeibler) DistNormal(l, r Normal) float64 { 139 d := l.Mu - r.Mu 140 v := (l.Sigma*l.Sigma + d*d) / (2 * r.Sigma * r.Sigma) 141 return math.Log(r.Sigma) - math.Log(l.Sigma) + v - 0.5 142 }