github.com/agnivade/pgm@v0.0.0-20210528073050-e2df0d9cb72d/pgm_index.go (about) 1 package pgm 2 3 import ( 4 "errors" 5 "math" 6 ) 7 8 // Index contains the PGM index. 9 // It is just a hierarchy of segments. 10 type Index struct { 11 levels [][]Segment 12 epsilon int 13 length int // length of the array. 14 } 15 16 // Segment contains the slope and intercept of the line segment 17 // and the corresponding key. 18 type Segment struct { 19 key float64 20 slope float64 21 intercept float64 22 } 23 24 type ApproxPos struct { 25 Pos int 26 Lo int 27 Hi int 28 } 29 30 type point struct { 31 x float64 32 y float64 33 } 34 35 // NewIndex returns a new static PGM index. 36 func NewIndex(input []float64, epsilon int) *Index { 37 index := &Index{ 38 levels: make([][]Segment, 0), 39 epsilon: epsilon, 40 length: len(input), 41 } 42 keys := make([]float64, len(input)) 43 copy(keys, input) 44 // repeat 45 // M = Build-PLA-model(keys, ε) 46 // levels[i] = M; i = i + 1 47 // m = Size(M) 48 // keys = [M[0].key, . . . , M[m − 1].key] 49 // until m = 1 50 for { 51 model := buildPLAModel(keys, epsilon) // returns a single level 52 index.levels = append(index.levels, model) 53 54 // Re-create keys for the next level. 55 keys = keys[:0] // wiping 56 for _, seg := range model { 57 keys = append(keys, seg.key) 58 } 59 60 if len(model) == 1 { 61 break 62 } 63 } 64 65 return index 66 } 67 68 func (ind *Index) Search(k float64) (ApproxPos, error) { 69 if len(ind.levels) < 1 || len(ind.levels[len(ind.levels)-1]) != 1 { 70 return ApproxPos{}, errors.New("invalid index") 71 } 72 root := ind.levels[len(ind.levels)-1][0] 73 pos := computePos(k, root) 74 75 for i := len(ind.levels) - 2; i >= 0; i-- { 76 lo := max(pos-ind.epsilon, 0) 77 hi := min(pos+ind.epsilon, len(ind.levels[i])-1) 78 79 var s, t Segment 80 // The rightmost segment s' in levels[i][lo, hi] such that s'.key ≤ k 81 for j, item := range ind.levels[i][lo : hi+1] { 82 if item.key <= k { 83 continue 84 } 85 s = ind.levels[i][max(lo+j-1, 0)] 86 t = item 87 } 88 if s.key == 0 { 89 s = ind.levels[i][hi] 90 t = s 91 } 92 pos = min(computePos(k, s), computePos(t.key, t)) 93 } 94 // We pad the results by one to account 95 // for the inaccuracies created by rounding off. 96 // This needs to be fixed. 97 lo := max(pos-ind.epsilon-1, 0) 98 hi := min(pos+ind.epsilon+1, ind.length-1) 99 return ApproxPos{Lo: lo, Hi: hi, Pos: pos}, nil 100 } 101 102 func computePos(k float64, s Segment) int { 103 // TODO: The rounding off introduces inaccuracies. 104 // We need to use integers all the way. 105 return int(math.Round(k*s.slope + s.intercept)) 106 } 107 108 func buildPLAModel(keys []float64, epsilon int) []Segment { 109 model := []Segment{} 110 temp := []point{} 111 for i := 0; i < len(keys); i++ { 112 temp = append(temp, point{x: keys[i], y: float64(i)}) 113 if len(temp) < 3 { 114 continue 115 } 116 hull := buildHull(temp) 117 r := hull.getSmallestRectangle() 118 h := r.getHeight() 119 120 if h > float64(2*epsilon) { 121 slope, intercept := r.slopeAndIntercept() 122 // Add to model 123 model = append(model, Segment{key: temp[0].x, slope: slope, intercept: intercept}) 124 // Empty convex hull 125 temp = temp[:0] 126 i-- 127 } 128 } 129 if len(temp) == 1 { 130 model = append(model, Segment{key: temp[0].x, slope: 1, intercept: temp[0].y - temp[0].x}) 131 } else if len(temp) > 1 { 132 r := buildHull(temp).getSmallestRectangle() 133 slope, intercept := r.slopeAndIntercept() 134 model = append(model, Segment{key: temp[0].x, slope: slope, intercept: intercept}) 135 } 136 return model 137 } 138 139 func max(x, y int) int { 140 if x > y { 141 return x 142 } 143 return y 144 } 145 146 func min(x, y int) int { 147 if x < y { 148 return x 149 } 150 return y 151 }