github.com/vench/word_index@v0.3.1/vector_index.go (about) 1 package word_index 2 3 import ( 4 "math" 5 "sort" 6 ) 7 8 type vector struct { 9 Id uint32 10 V []float64 11 Data interface{} 12 } 13 14 func (v *vector) DistCos(a *vector) float64 { 15 return distCos(a.V, v.V) 16 } 17 18 func (v *vector) DistMonteCarlo(a *vector) float64 { 19 return 0 20 } 21 22 func (v *vector) DistEuclidean(a *vector) float64 { 23 return distEuclidean(a.V, v.V) 24 } 25 26 func NewEmptyVector(id uint32, size int) *vector { 27 return &vector{ 28 Id: id, 29 V: make([]float64, size), 30 } 31 } 32 33 func NewVector(id uint32, v []float64, data interface{}) *vector { 34 return &vector{ 35 Id: id, 36 V: v, 37 Data: data, 38 } 39 } 40 41 type indexVectorItem struct { 42 i *vector 43 z uint64 44 neighbors []*indexVectorItem 45 } 46 47 type IndexVector struct { 48 itemsMap map[uint32]*indexVectorItem 49 itemsOrderZ []*indexVectorItem 50 neighborsThreshold float64 51 } 52 53 func (iv *IndexVector) Fit(list []*vector) error { 54 items := make([]*indexVectorItem, len(list)) 55 itemsMap := make(map[uint32]*indexVectorItem) 56 for i, v := range list { 57 item := &indexVectorItem{ 58 i: v, 59 z: ZOrderCurveFloat64(v.V), 60 } 61 items[i] = item 62 itemsMap[item.i.Id] = item 63 } 64 sort.Slice(items, func(i, j int) bool { 65 return items[i].z < items[j].z 66 }) 67 68 // update neighbors O(N^2) 69 for i, v := range itemsMap { 70 v.neighbors = make([]*indexVectorItem, 0) 71 for j, v1 := range itemsMap { 72 if i == j { 73 continue 74 } 75 // TODO set sist type 76 if v.i.DistEuclidean(v1.i) <= iv.neighborsThreshold { 77 v.neighbors = append(v.neighbors, v1) 78 } 79 } 80 } 81 82 iv.itemsMap = itemsMap 83 iv.itemsOrderZ = items 84 85 return nil 86 } 87 88 func (iv *IndexVector) SearchNeighborhood(v []float64, neighborhood []float64) ([]*vector, error) { 89 zSearch := ZOrderCurveFloat64(v) 90 zNeighborhood := ZOrderCurveFloat64(neighborhood) 91 zSearchLow := uint64(0) 92 if zSearch > zNeighborhood { 93 zSearchLow = zSearch - zNeighborhood 94 } 95 zSearchHigh := zSearch + zNeighborhood 96 low := 0 97 high := len(iv.itemsOrderZ) - 1 98 for low <= high { 99 median := (low + high) / 2 100 if iv.itemsOrderZ[median].z < zSearchLow { 101 low = median + 1 102 } else { 103 high = median - 1 104 } 105 } 106 result := make([]*vector, 0) 107 for low < len(iv.itemsOrderZ) && iv.itemsOrderZ[low].z <= zSearchHigh { 108 //fmt.Println(iv.itemsOrderZ[low].i.Id) 109 result = append(result, iv.itemsOrderZ[low].i) 110 low++ 111 } 112 return result, nil 113 } 114 115 func (iv *IndexVector) Search(v []float64) ([]*vector, error) { 116 zSearch := ZOrderCurveFloat64(v) 117 low := 0 118 high := len(iv.itemsOrderZ) - 1 119 for low <= high { 120 median := (low + high) / 2 121 if iv.itemsOrderZ[median].z < zSearch { 122 low = median + 1 123 } else { 124 high = median - 1 125 } 126 } 127 result := make([]*vector, 0) 128 for low < len(iv.itemsOrderZ) && iv.itemsOrderZ[low].z <= zSearch { 129 //fmt.Println(iv.itemsOrderZ[low].i.Id) 130 result = append(result, iv.itemsOrderZ[low].i) 131 low++ 132 } 133 return result, nil 134 } 135 136 func NewIndexVector() (*IndexVector, error) { 137 return &IndexVector{}, nil 138 } 139 140 func ZOrderCurveFloat64(vec []float64) uint64 { 141 v := make([]uint64, len(vec)) 142 for i, x := range vec { 143 v[i] = zOrderCurveFloat64ToUint64(x) 144 } 145 return ZOrderCurve(v) 146 } 147 148 func zOrderCurveFloat64ToUint64(x float64) uint64 { 149 return uint64(x * 1000000) 150 } 151 152 func ZOrderCurve(vec []uint64) uint64 { 153 B := []uint64{0x00000000FFFFFFFF, 0x0000FFFF0000FFFF, 0x00FF00FF00FF00FF, 0x0F0F0F0F0F0F0F0F, 0x3333333333333333, 0x5555555555555555} 154 S := []uint64{32, 16, 8, 4, 2, 1} 155 156 for i := 0; i < len(S); i++ { 157 for j := 0; j < len(vec); j++ { 158 vec[j] = (vec[j] | (vec[j] << S[i])) & B[i] 159 } 160 } 161 r := uint64(0) 162 for i, v := range vec { 163 r |= v << i 164 } 165 return r 166 } 167 168 func distCos(a, b []float64) float64 { 169 if len(a) != len(b) { 170 return 0 171 } 172 as, bs, ab := float64(0), float64(0), float64(0) 173 for i := 0; i < len(a); i++ { 174 as += a[i] * a[i] 175 bs += b[i] * b[i] 176 ab += a[i] * b[i] 177 } 178 179 if as == 0 || bs == 0 { 180 return 0.0 181 } 182 return ab / (math.Sqrt(as) * math.Sqrt(bs)) 183 } 184 185 func distEuclidean(a, b []float64) float64 { 186 if len(a) != len(b) { 187 return 0 188 } 189 s := float64(0) 190 for i := 0; i < len(a); i++ { 191 s += math.Pow(a[i]-b[i], 2) 192 } 193 return math.Sqrt(s) 194 }