github.com/vench/word_index@v0.3.1/vector_index.go (about)

     1  package word_index
     2  
     3  import (
     4  	"math"
     5  	"sort"
     6  )
     7  
     8  type vector struct {
     9  	Id   uint32
    10  	V    []float64
    11  	Data interface{}
    12  }
    13  
    14  func (v *vector) DistCos(a *vector) float64 {
    15  	return distCos(a.V, v.V)
    16  }
    17  
    18  func (v *vector) DistMonteCarlo(a *vector) float64 {
    19  	return 0
    20  }
    21  
    22  func (v *vector) DistEuclidean(a *vector) float64 {
    23  	return distEuclidean(a.V, v.V)
    24  }
    25  
    26  func NewEmptyVector(id uint32, size int) *vector {
    27  	return &vector{
    28  		Id: id,
    29  		V:  make([]float64, size),
    30  	}
    31  }
    32  
    33  func NewVector(id uint32, v []float64, data interface{}) *vector {
    34  	return &vector{
    35  		Id:   id,
    36  		V:    v,
    37  		Data: data,
    38  	}
    39  }
    40  
    41  type indexVectorItem struct {
    42  	i         *vector
    43  	z         uint64
    44  	neighbors []*indexVectorItem
    45  }
    46  
    47  type IndexVector struct {
    48  	itemsMap           map[uint32]*indexVectorItem
    49  	itemsOrderZ        []*indexVectorItem
    50  	neighborsThreshold float64
    51  }
    52  
    53  func (iv *IndexVector) Fit(list []*vector) error {
    54  	items := make([]*indexVectorItem, len(list))
    55  	itemsMap := make(map[uint32]*indexVectorItem)
    56  	for i, v := range list {
    57  		item := &indexVectorItem{
    58  			i: v,
    59  			z: ZOrderCurveFloat64(v.V),
    60  		}
    61  		items[i] = item
    62  		itemsMap[item.i.Id] = item
    63  	}
    64  	sort.Slice(items, func(i, j int) bool {
    65  		return items[i].z < items[j].z
    66  	})
    67  
    68  	// update neighbors O(N^2)
    69  	for i, v := range itemsMap {
    70  		v.neighbors = make([]*indexVectorItem, 0)
    71  		for j, v1 := range itemsMap {
    72  			if i == j {
    73  				continue
    74  			}
    75  			// TODO set sist type
    76  			if v.i.DistEuclidean(v1.i) <= iv.neighborsThreshold {
    77  				v.neighbors = append(v.neighbors, v1)
    78  			}
    79  		}
    80  	}
    81  
    82  	iv.itemsMap = itemsMap
    83  	iv.itemsOrderZ = items
    84  
    85  	return nil
    86  }
    87  
    88  func (iv *IndexVector) SearchNeighborhood(v []float64, neighborhood []float64) ([]*vector, error) {
    89  	zSearch := ZOrderCurveFloat64(v)
    90  	zNeighborhood := ZOrderCurveFloat64(neighborhood)
    91  	zSearchLow := uint64(0)
    92  	if zSearch > zNeighborhood {
    93  		zSearchLow = zSearch - zNeighborhood
    94  	}
    95  	zSearchHigh := zSearch + zNeighborhood
    96  	low := 0
    97  	high := len(iv.itemsOrderZ) - 1
    98  	for low <= high {
    99  		median := (low + high) / 2
   100  		if iv.itemsOrderZ[median].z < zSearchLow {
   101  			low = median + 1
   102  		} else {
   103  			high = median - 1
   104  		}
   105  	}
   106  	result := make([]*vector, 0)
   107  	for low < len(iv.itemsOrderZ) && iv.itemsOrderZ[low].z <= zSearchHigh {
   108  		//fmt.Println(iv.itemsOrderZ[low].i.Id)
   109  		result = append(result, iv.itemsOrderZ[low].i)
   110  		low++
   111  	}
   112  	return result, nil
   113  }
   114  
   115  func (iv *IndexVector) Search(v []float64) ([]*vector, error) {
   116  	zSearch := ZOrderCurveFloat64(v)
   117  	low := 0
   118  	high := len(iv.itemsOrderZ) - 1
   119  	for low <= high {
   120  		median := (low + high) / 2
   121  		if iv.itemsOrderZ[median].z < zSearch {
   122  			low = median + 1
   123  		} else {
   124  			high = median - 1
   125  		}
   126  	}
   127  	result := make([]*vector, 0)
   128  	for low < len(iv.itemsOrderZ) && iv.itemsOrderZ[low].z <= zSearch {
   129  		//fmt.Println(iv.itemsOrderZ[low].i.Id)
   130  		result = append(result, iv.itemsOrderZ[low].i)
   131  		low++
   132  	}
   133  	return result, nil
   134  }
   135  
   136  func NewIndexVector() (*IndexVector, error) {
   137  	return &IndexVector{}, nil
   138  }
   139  
   140  func ZOrderCurveFloat64(vec []float64) uint64 {
   141  	v := make([]uint64, len(vec))
   142  	for i, x := range vec {
   143  		v[i] = zOrderCurveFloat64ToUint64(x)
   144  	}
   145  	return ZOrderCurve(v)
   146  }
   147  
   148  func zOrderCurveFloat64ToUint64(x float64) uint64 {
   149  	return uint64(x * 1000000)
   150  }
   151  
   152  func ZOrderCurve(vec []uint64) uint64 {
   153  	B := []uint64{0x00000000FFFFFFFF, 0x0000FFFF0000FFFF, 0x00FF00FF00FF00FF, 0x0F0F0F0F0F0F0F0F, 0x3333333333333333, 0x5555555555555555}
   154  	S := []uint64{32, 16, 8, 4, 2, 1}
   155  
   156  	for i := 0; i < len(S); i++ {
   157  		for j := 0; j < len(vec); j++ {
   158  			vec[j] = (vec[j] | (vec[j] << S[i])) & B[i]
   159  		}
   160  	}
   161  	r := uint64(0)
   162  	for i, v := range vec {
   163  		r |= v << i
   164  	}
   165  	return r
   166  }
   167  
   168  func distCos(a, b []float64) float64 {
   169  	if len(a) != len(b) {
   170  		return 0
   171  	}
   172  	as, bs, ab := float64(0), float64(0), float64(0)
   173  	for i := 0; i < len(a); i++ {
   174  		as += a[i] * a[i]
   175  		bs += b[i] * b[i]
   176  		ab += a[i] * b[i]
   177  	}
   178  
   179  	if as == 0 || bs == 0 {
   180  		return 0.0
   181  	}
   182  	return ab / (math.Sqrt(as) * math.Sqrt(bs))
   183  }
   184  
   185  func distEuclidean(a, b []float64) float64 {
   186  	if len(a) != len(b) {
   187  		return 0
   188  	}
   189  	s := float64(0)
   190  	for i := 0; i < len(a); i++ {
   191  		s += math.Pow(a[i]-b[i], 2)
   192  	}
   193  	return math.Sqrt(s)
   194  }