github.com/schollz/clusters@v0.0.0-20221201012527-c6c68863636f/clusters.go (about)

     1  // Package clusters provides abstract definitions of clusterers as well as
     2  // their implementations.
     3  package clusters
     4  
     5  import (
     6  	"math"
     7  )
     8  
     9  // DistanceFunc represents a function for measuring distance
    10  // between n-dimensional vectors.
    11  type DistanceFunc func([]float64, []float64) float64
    12  
    13  // Online represents parameters important for online learning in
    14  // clustering algorithms.
    15  type Online struct {
    16  	Alpha     float64
    17  	Dimension int
    18  }
    19  
    20  // HCEvent represents the intermediate result of computation of hard clustering algorithm
    21  // and are transmitted periodically to the caller during online learning
    22  type HCEvent struct {
    23  	Cluster     int
    24  	Observation []float64
    25  }
    26  
    27  // Clusterer defines the operation of learning
    28  // common for all algorithms
    29  type Clusterer interface {
    30  	Learn([][]float64) error
    31  }
    32  
    33  // HardClusterer defines a set of operations for hard clustering algorithms
    34  type HardClusterer interface {
    35  
    36  	// Sizes returns sizes of respective clusters
    37  	Sizes() []int
    38  
    39  	// Guesses returns mapping from data point indices to cluster numbers. Clusters' numbering begins at 1.
    40  	Guesses() []int
    41  
    42  	// Predict returns number of cluster to which the observation would be assigned
    43  	Predict(observation []float64) int
    44  
    45  	// IsOnline tells the algorithm supports online learning
    46  	IsOnline() bool
    47  
    48  	// WithOnline configures the algorithms for online learning with given parameters
    49  	WithOnline(Online) HardClusterer
    50  
    51  	// Online begins the process of online training of an algorithm. Observations are sent on the observations channel,
    52  	// once no more are expected an empty struct needs to be sent on done channel. Caller receives intermediate results of computation via
    53  	// the returned channel.
    54  	Online(observations chan []float64, done chan struct{}) chan *HCEvent
    55  
    56  	// Implement common operation
    57  	Clusterer
    58  }
    59  
    60  // Estimator defines a computation used to determine an optimal number of clusters in the dataset
    61  type Estimator interface {
    62  
    63  	// Estimate provides an expected number of clusters in the dataset
    64  	Estimate([][]float64) (int, error)
    65  }
    66  
    67  // Importer defines an operation of importing the dataset from an external file
    68  type Importer interface {
    69  
    70  	// Import fetches the data from a file, start and end arguments allow user
    71  	// to specify the span of data columns to be imported (inclusively)
    72  	Import(file string, start, end int) ([][]float64, error)
    73  }
    74  
    75  var (
    76  	// EuclideanDistance is one of the common distance measurement
    77  	EuclideanDistance = func(a, b []float64) float64 {
    78  		var (
    79  			s, t float64
    80  		)
    81  
    82  		for i, _ := range a {
    83  			t = a[i] - b[i]
    84  			s += t * t
    85  		}
    86  
    87  		return math.Sqrt(s)
    88  	}
    89  
    90  	// EuclideanDistanceSquared is one of the common distance measurement
    91  	EuclideanDistanceSquared = func(a, b []float64) float64 {
    92  		var (
    93  			s, t float64
    94  		)
    95  
    96  		for i, _ := range a {
    97  			t = a[i] - b[i]
    98  			s += t * t
    99  		}
   100  
   101  		return s
   102  	}
   103  
   104  	// Manhattan distance captures the distance between two points by 
   105  	// aggregating the pairwise absolute difference between each variable
   106  	ManhattanDistance = func(a, b []float64) float64 {
   107  		var (
   108  			s float64
   109  		)
   110  		
   111  		for i, _ := range a {
   112  			s += math.Abs(a[i]-b[i])
   113  		}
   114  
   115  		return s
   116  	}
   117  )