github.com/agnivade/pgm@v0.0.0-20210528073050-e2df0d9cb72d/pgm_index.go (about)

     1  package pgm
     2  
     3  import (
     4  	"errors"
     5  	"math"
     6  )
     7  
     8  // Index contains the PGM index.
     9  // It is just a hierarchy of segments.
    10  type Index struct {
    11  	levels  [][]Segment
    12  	epsilon int
    13  	length  int // length of the array.
    14  }
    15  
    16  // Segment contains the slope and intercept of the line segment
    17  // and the corresponding key.
    18  type Segment struct {
    19  	key       float64
    20  	slope     float64
    21  	intercept float64
    22  }
    23  
    24  type ApproxPos struct {
    25  	Pos int
    26  	Lo  int
    27  	Hi  int
    28  }
    29  
    30  type point struct {
    31  	x float64
    32  	y float64
    33  }
    34  
    35  // NewIndex returns a new static PGM index.
    36  func NewIndex(input []float64, epsilon int) *Index {
    37  	index := &Index{
    38  		levels:  make([][]Segment, 0),
    39  		epsilon: epsilon,
    40  		length:  len(input),
    41  	}
    42  	keys := make([]float64, len(input))
    43  	copy(keys, input)
    44  	// repeat
    45  	// M = Build-PLA-model(keys, ε)
    46  	// levels[i] = M; i = i + 1
    47  	// m = Size(M)
    48  	// keys = [M[0].key, . . . , M[m − 1].key]
    49  	// until m = 1
    50  	for {
    51  		model := buildPLAModel(keys, epsilon) // returns a single level
    52  		index.levels = append(index.levels, model)
    53  
    54  		// Re-create keys for the next level.
    55  		keys = keys[:0] // wiping
    56  		for _, seg := range model {
    57  			keys = append(keys, seg.key)
    58  		}
    59  
    60  		if len(model) == 1 {
    61  			break
    62  		}
    63  	}
    64  
    65  	return index
    66  }
    67  
    68  func (ind *Index) Search(k float64) (ApproxPos, error) {
    69  	if len(ind.levels) < 1 || len(ind.levels[len(ind.levels)-1]) != 1 {
    70  		return ApproxPos{}, errors.New("invalid index")
    71  	}
    72  	root := ind.levels[len(ind.levels)-1][0]
    73  	pos := computePos(k, root)
    74  
    75  	for i := len(ind.levels) - 2; i >= 0; i-- {
    76  		lo := max(pos-ind.epsilon, 0)
    77  		hi := min(pos+ind.epsilon, len(ind.levels[i])-1)
    78  
    79  		var s, t Segment
    80  		// The rightmost segment s' in levels[i][lo, hi] such that s'.key ≤ k
    81  		for j, item := range ind.levels[i][lo : hi+1] {
    82  			if item.key <= k {
    83  				continue
    84  			}
    85  			s = ind.levels[i][max(lo+j-1, 0)]
    86  			t = item
    87  		}
    88  		if s.key == 0 {
    89  			s = ind.levels[i][hi]
    90  			t = s
    91  		}
    92  		pos = min(computePos(k, s), computePos(t.key, t))
    93  	}
    94  	// We pad the results by one to account
    95  	// for the inaccuracies created by rounding off.
    96  	// This needs to be fixed.
    97  	lo := max(pos-ind.epsilon-1, 0)
    98  	hi := min(pos+ind.epsilon+1, ind.length-1)
    99  	return ApproxPos{Lo: lo, Hi: hi, Pos: pos}, nil
   100  }
   101  
   102  func computePos(k float64, s Segment) int {
   103  	// TODO: The rounding off introduces inaccuracies.
   104  	// We need to use integers all the way.
   105  	return int(math.Round(k*s.slope + s.intercept))
   106  }
   107  
   108  func buildPLAModel(keys []float64, epsilon int) []Segment {
   109  	model := []Segment{}
   110  	temp := []point{}
   111  	for i := 0; i < len(keys); i++ {
   112  		temp = append(temp, point{x: keys[i], y: float64(i)})
   113  		if len(temp) < 3 {
   114  			continue
   115  		}
   116  		hull := buildHull(temp)
   117  		r := hull.getSmallestRectangle()
   118  		h := r.getHeight()
   119  
   120  		if h > float64(2*epsilon) {
   121  			slope, intercept := r.slopeAndIntercept()
   122  			// Add to model
   123  			model = append(model, Segment{key: temp[0].x, slope: slope, intercept: intercept})
   124  			// Empty convex hull
   125  			temp = temp[:0]
   126  			i--
   127  		}
   128  	}
   129  	if len(temp) == 1 {
   130  		model = append(model, Segment{key: temp[0].x, slope: 1, intercept: temp[0].y - temp[0].x})
   131  	} else if len(temp) > 1 {
   132  		r := buildHull(temp).getSmallestRectangle()
   133  		slope, intercept := r.slopeAndIntercept()
   134  		model = append(model, Segment{key: temp[0].x, slope: slope, intercept: intercept})
   135  	}
   136  	return model
   137  }
   138  
   139  func max(x, y int) int {
   140  	if x > y {
   141  		return x
   142  	}
   143  	return y
   144  }
   145  
   146  func min(x, y int) int {
   147  	if x < y {
   148  		return x
   149  	}
   150  	return y
   151  }