gonum.org/v1/gonum@v0.14.0/stat/distuv/statdist.go

gonum.org/v1/gonum@v0.14.0/stat/distuv/statdist.go (about)

     1  // Copyright ©2018 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package distuv
     6  
     7  import (
     8  	"math"
     9  
    10  	"gonum.org/v1/gonum/mathext"
    11  )
    12  
    13  // Bhattacharyya is a type for computing the Bhattacharyya distance between
    14  // probability distributions.
    15  //
    16  // The Bhattacharyya distance is defined as
    17  //
    18  //	D_B = -ln(BC(l,r))
    19  //	BC = \int_-∞^∞ (p(x)q(x))^(1/2) dx
    20  //
    21  // Where BC is known as the Bhattacharyya coefficient.
    22  // The Bhattacharyya distance is related to the Hellinger distance by
    23  //
    24  //	H(l,r) = sqrt(1-BC(l,r))
    25  //
    26  // For more information, see
    27  //
    28  //	https://en.wikipedia.org/wiki/Bhattacharyya_distance
    29  type Bhattacharyya struct{}
    30  
    31  // DistBeta returns the Bhattacharyya distance between Beta distributions l and r.
    32  // For Beta distributions, the Bhattacharyya distance is given by
    33  //
    34  //	-ln(B((α_l + α_r)/2, (β_l + β_r)/2) / (B(α_l,β_l), B(α_r,β_r)))
    35  //
    36  // Where B is the Beta function.
    37  func (Bhattacharyya) DistBeta(l, r Beta) float64 {
    38  	// Reference: https://en.wikipedia.org/wiki/Hellinger_distance#Examples
    39  	return -mathext.Lbeta((l.Alpha+r.Alpha)/2, (l.Beta+r.Beta)/2) +
    40  		0.5*mathext.Lbeta(l.Alpha, l.Beta) + 0.5*mathext.Lbeta(r.Alpha, r.Beta)
    41  }
    42  
    43  // DistNormal returns the Bhattacharyya distance Normal distributions l and r.
    44  // For Normal distributions, the Bhattacharyya distance is given by
    45  //
    46  //	s = (σ_l^2 + σ_r^2)/2
    47  //	BC = 1/8 (μ_l-μ_r)^2/s + 1/2 ln(s/(σ_l*σ_r))
    48  func (Bhattacharyya) DistNormal(l, r Normal) float64 {
    49  	// Reference: https://en.wikipedia.org/wiki/Bhattacharyya_distance
    50  	m := l.Mu - r.Mu
    51  	s := (l.Sigma*l.Sigma + r.Sigma*r.Sigma) / 2
    52  	return 0.125*m*m/s + 0.5*math.Log(s) - 0.5*math.Log(l.Sigma) - 0.5*math.Log(r.Sigma)
    53  }
    54  
    55  // Hellinger is a type for computing the Hellinger distance between probability
    56  // distributions.
    57  //
    58  // The Hellinger distance is defined as
    59  //
    60  //	H^2(l,r) = 1/2 * int_x (\sqrt(l(x)) - \sqrt(r(x)))^2 dx
    61  //
    62  // and is bounded between 0 and 1. Note the above formula defines the squared
    63  // Hellinger distance, while this returns the Hellinger distance itself.
    64  // The Hellinger distance is related to the Bhattacharyya distance by
    65  //
    66  //	H^2 = 1 - exp(-D_B)
    67  //
    68  // For more information, see
    69  //
    70  //	https://en.wikipedia.org/wiki/Hellinger_distance
    71  type Hellinger struct{}
    72  
    73  // DistBeta computes the Hellinger distance between Beta distributions l and r.
    74  // See the documentation of Bhattacharyya.DistBeta for the distance formula.
    75  func (Hellinger) DistBeta(l, r Beta) float64 {
    76  	db := Bhattacharyya{}.DistBeta(l, r)
    77  	return math.Sqrt(-math.Expm1(-db))
    78  }
    79  
    80  // DistNormal computes the Hellinger distance between Normal distributions l and r.
    81  // See the documentation of Bhattacharyya.DistNormal for the distance formula.
    82  func (Hellinger) DistNormal(l, r Normal) float64 {
    83  	db := Bhattacharyya{}.DistNormal(l, r)
    84  	return math.Sqrt(-math.Expm1(-db))
    85  }
    86  
    87  // KullbackLeibler is a type for computing the Kullback-Leibler divergence from l to r.
    88  //
    89  // The Kullback-Leibler divergence is defined as
    90  //
    91  //	D_KL(l || r ) = \int_x p(x) log(p(x)/q(x)) dx
    92  //
    93  // Note that the Kullback-Leibler divergence is not symmetric with respect to
    94  // the order of the input arguments.
    95  type KullbackLeibler struct{}
    96  
    97  // DistBeta returns the Kullback-Leibler divergence between Beta distributions
    98  // l and r.
    99  //
   100  // For two Beta distributions, the KL divergence is computed as
   101  //
   102  //	D_KL(l || r) =  log Γ(α_l+β_l) - log Γ(α_l) - log Γ(β_l)
   103  //	                - log Γ(α_r+β_r) + log Γ(α_r) + log Γ(β_r)
   104  //	                + (α_l-α_r)(ψ(α_l)-ψ(α_l+β_l)) + (β_l-β_r)(ψ(β_l)-ψ(α_l+β_l))
   105  //
   106  // Where Γ is the gamma function and ψ is the digamma function.
   107  func (KullbackLeibler) DistBeta(l, r Beta) float64 {
   108  	// http://bariskurt.com/kullback-leibler-divergence-between-two-dirichlet-and-beta-distributions/
   109  	if l.Alpha <= 0 || l.Beta <= 0 {
   110  		panic("distuv: bad parameters for left distribution")
   111  	}
   112  	if r.Alpha <= 0 || r.Beta <= 0 {
   113  		panic("distuv: bad parameters for right distribution")
   114  	}
   115  	lab := l.Alpha + l.Beta
   116  	l1, _ := math.Lgamma(lab)
   117  	l2, _ := math.Lgamma(l.Alpha)
   118  	l3, _ := math.Lgamma(l.Beta)
   119  	lt := l1 - l2 - l3
   120  
   121  	r1, _ := math.Lgamma(r.Alpha + r.Beta)
   122  	r2, _ := math.Lgamma(r.Alpha)
   123  	r3, _ := math.Lgamma(r.Beta)
   124  	rt := r1 - r2 - r3
   125  
   126  	d0 := mathext.Digamma(l.Alpha + l.Beta)
   127  	ct := (l.Alpha-r.Alpha)*(mathext.Digamma(l.Alpha)-d0) + (l.Beta-r.Beta)*(mathext.Digamma(l.Beta)-d0)
   128  
   129  	return lt - rt + ct
   130  }
   131  
   132  // DistNormal returns the Kullback-Leibler divergence between Normal distributions
   133  // l and r.
   134  //
   135  // For two Normal distributions, the KL divergence is computed as
   136  //
   137  //	D_KL(l || r) = log(σ_r / σ_l) + (σ_l^2 + (μ_l-μ_r)^2)/(2 * σ_r^2) - 0.5
   138  func (KullbackLeibler) DistNormal(l, r Normal) float64 {
   139  	d := l.Mu - r.Mu
   140  	v := (l.Sigma*l.Sigma + d*d) / (2 * r.Sigma * r.Sigma)
   141  	return math.Log(r.Sigma) - math.Log(l.Sigma) + v - 0.5
   142  }