github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/stats/sample.go (about)

     1  // Copyright 2021 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // Package stats provides various statistical operations and algorithms.
     5  package stats
     6  
     7  import (
     8  	"math"
     9  	"sort"
    10  )
    11  
    12  // Sample represents a single sample - set of data points collected during an experiment.
    13  type Sample struct {
    14  	Xs     []float64
    15  	Sorted bool
    16  }
    17  
    18  func (s *Sample) Percentile(p float64) float64 {
    19  	s.Sort()
    20  	// The code below is taken from golang.org/x/perf/internal/stats
    21  	// Unfortunately, that package is internal and we cannot just import and use it.
    22  	N := float64(len(s.Xs))
    23  	n := 1/3.0 + p*(N+1/3.0) // R8
    24  	kf, frac := math.Modf(n)
    25  	k := int(kf)
    26  	if k <= 0 {
    27  		return s.Xs[0]
    28  	} else if k >= len(s.Xs) {
    29  		return s.Xs[len(s.Xs)-1]
    30  	}
    31  	return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
    32  }
    33  
    34  func (s *Sample) Median() float64 {
    35  	return s.Percentile(0.5)
    36  }
    37  
    38  // Remove outliers by the Tukey's fences method.
    39  func (s *Sample) RemoveOutliers() *Sample {
    40  	if len(s.Xs) < 4 {
    41  		// If the data set is too small, we cannot reliably detect outliers anyway.
    42  		return s.Copy()
    43  	}
    44  	s.Sort()
    45  	Q1 := s.Percentile(0.25)
    46  	Q3 := s.Percentile(0.75)
    47  	minValue := Q1 - 1.5*(Q3-Q1)
    48  	maxValue := Q3 + 1.5*(Q3-Q1)
    49  	xs := []float64{}
    50  	for _, value := range s.Xs {
    51  		if value >= minValue && value <= maxValue {
    52  			xs = append(xs, value)
    53  		}
    54  	}
    55  	return &Sample{
    56  		Xs:     xs,
    57  		Sorted: s.Sorted,
    58  	}
    59  }
    60  
    61  func (s *Sample) Copy() *Sample {
    62  	return &Sample{
    63  		Xs:     append([]float64{}, s.Xs...),
    64  		Sorted: s.Sorted,
    65  	}
    66  }
    67  
    68  func (s *Sample) Sort() {
    69  	if !s.Sorted {
    70  		sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] })
    71  		s.Sorted = true
    72  	}
    73  }