github.com/aclements/go-misc@v0.0.0-20240129233631-2f6ede80790c/findflakes/adtest.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "errors" 9 "fmt" 10 "math" 11 "sort" 12 ) 13 14 var ( 15 ErrSampleSize = errors.New("sample is too small") 16 ) 17 18 type SampleValueError struct { 19 Value int 20 Detail string 21 } 22 23 func (e *SampleValueError) Error() string { 24 return e.Detail 25 } 26 27 type AndersonDarlingTestResult struct { 28 // A2 is the Anderson-Darling test statistic, A², for the 29 // goodness of fit of the sample to the probability 30 // distribution. 31 A2 float64 32 33 // P is the p-value for this test. A small value of P 34 // indicates a significant difference between the sample and 35 // the distribution. 36 P float64 37 } 38 39 // AndersonDarlingTest performs an Anderson-Darling goodness-of-fit 40 // test for whether a sample comes from a population with a specified 41 // distribution. It tests the null hypothesis that sample follows dist 42 // against the alternate hypothesis that sample does not follow dist. 43 // 44 // Note that this uses a Monte Carlo method (parametric bootstrap) to 45 // estimate the distribution of the test statistic and hence the exact 46 // P value may vary slightly between calls with the same sample and 47 // distribution. 48 func AndersonDarlingTest(sample []int, dist *GeometricDist) (*AndersonDarlingTestResult, error) { 49 if len(sample) == 0 { 50 return nil, ErrSampleSize 51 } 52 53 if !sort.IntsAreSorted(sample) { 54 sample = append([]int(nil), sample...) 55 sort.Ints(sample) 56 } 57 58 A2, err := andersonDarling(sample, dist) 59 if err != nil { 60 return nil, err 61 } 62 63 // Use parametric bootstrap to estimate the distribution of 64 // A². 65 const resamples = 1000 66 nsample := make([]int, len(sample)) 67 ngreater := 0 68 for i := 0; i < resamples; i++ { 69 for j := range nsample { 70 nsample[j] = dist.Rand() 71 } 72 sort.Ints(nsample) 73 nA2, err := andersonDarling(nsample, dist) 74 if err != nil { 75 return nil, err 76 } 77 if nA2 >= A2 { 78 ngreater++ 79 } 80 } 81 p := float64(ngreater) / resamples 82 83 return &AndersonDarlingTestResult{A2, p}, nil 84 } 85 86 // andersonDarling returns the Anderson-Darling test statistic, A², 87 // for the goodness of fit of sample to dist. 88 // 89 // sample must be sorted. 90 func andersonDarling(sample []int, dist *GeometricDist) (float64, error) { 91 sum := 0.0 92 // TODO: Rearrange terms so we don't have to compute each 93 // sample's CDF twice. 94 for i, y1 := range sample { 95 y2 := sample[len(sample)-i-1] 96 cdf1, sf2 := dist.CDF(y1), dist.SF(y2) 97 if cdf1 == 0 { 98 return 0, &SampleValueError{ 99 Value: y1, 100 Detail: fmt.Sprintf("sample %d lies outside support of expected distribution %v", y1, dist), 101 } 102 } 103 if sf2 == 0 { 104 return 0, &SampleValueError{ 105 Value: y2, 106 Detail: fmt.Sprintf("sample %d lies outside support of expected distribution %v", y2, dist), 107 } 108 } 109 sum += float64(2*i-1) * (math.Log(cdf1) + math.Log(sf2)) 110 } 111 return -float64(len(sample)) - sum/float64(len(sample)), nil 112 }