github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/geo/geo.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package geo
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  
    18  	"github.com/pkg/errors"
    19  	"github.com/sirupsen/logrus"
    20  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    21  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw"
    22  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer"
    23  	"github.com/weaviate/weaviate/entities/cyclemanager"
    24  	"github.com/weaviate/weaviate/entities/filters"
    25  	"github.com/weaviate/weaviate/entities/models"
    26  	hnswent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    27  )
    28  
    29  // Index wraps another index to provide geo searches. This allows us to reuse
    30  // the hnsw vector index, without making geo searches dependent on
    31  // hnsw-specific features.
    32  //
    33  // In the future we could use this level of abstraction to provide a better
    34  // suited geo-index if we deem it necessary
    35  type Index struct {
    36  	config      Config
    37  	vectorIndex vectorIndex
    38  }
    39  
    40  // vectorIndex represents the underlying vector index, typically hnsw
    41  type vectorIndex interface {
    42  	Add(id uint64, vector []float32) error
    43  	KnnSearchByVectorMaxDist(query []float32, dist float32, ef int,
    44  		allowList helpers.AllowList) ([]uint64, error)
    45  	Delete(id ...uint64) error
    46  	Dump(...string)
    47  	Drop(ctx context.Context) error
    48  	PostStartup()
    49  }
    50  
    51  // Config is passed to the GeoIndex when its created
    52  type Config struct {
    53  	ID                 string
    54  	CoordinatesForID   CoordinatesForID
    55  	DisablePersistence bool
    56  	RootPath           string
    57  	Logger             logrus.FieldLogger
    58  }
    59  
    60  func NewIndex(config Config,
    61  	commitLogMaintenanceCallbacks, tombstoneCleanupCallbacks,
    62  	compactionCallbacks, flushCallbacks cyclemanager.CycleCallbackGroup,
    63  ) (*Index, error) {
    64  	vi, err := hnsw.New(hnsw.Config{
    65  		VectorForIDThunk:      config.CoordinatesForID.VectorForID,
    66  		ID:                    config.ID,
    67  		RootPath:              config.RootPath,
    68  		MakeCommitLoggerThunk: makeCommitLoggerFromConfig(config, commitLogMaintenanceCallbacks),
    69  		DistanceProvider:      distancer.NewGeoProvider(),
    70  	}, hnswent.UserConfig{
    71  		MaxConnections:         64,
    72  		EFConstruction:         128,
    73  		CleanupIntervalSeconds: hnswent.DefaultCleanupIntervalSeconds,
    74  	}, tombstoneCleanupCallbacks, compactionCallbacks, flushCallbacks, nil)
    75  	if err != nil {
    76  		return nil, errors.Wrap(err, "underlying hnsw index")
    77  	}
    78  
    79  	i := &Index{
    80  		config:      config,
    81  		vectorIndex: vi,
    82  	}
    83  
    84  	return i, nil
    85  }
    86  
    87  func (i *Index) Drop(ctx context.Context) error {
    88  	if err := i.vectorIndex.Drop(ctx); err != nil {
    89  		return err
    90  	}
    91  
    92  	i.vectorIndex = nil
    93  	return nil
    94  }
    95  
    96  func (i *Index) PostStartup() {
    97  	i.vectorIndex.PostStartup()
    98  }
    99  
   100  func makeCommitLoggerFromConfig(config Config, maintenanceCallbacks cyclemanager.CycleCallbackGroup,
   101  ) hnsw.MakeCommitLogger {
   102  	makeCL := hnsw.MakeNoopCommitLogger
   103  	if !config.DisablePersistence {
   104  		makeCL = func() (hnsw.CommitLogger, error) {
   105  			return hnsw.NewCommitLogger(config.RootPath, config.ID, config.Logger, maintenanceCallbacks)
   106  		}
   107  	}
   108  	return makeCL
   109  }
   110  
   111  // Add extends the index with the specified GeoCoordinates. It is thread-safe
   112  // and can be called concurrently.
   113  func (i *Index) Add(id uint64, coordinates *models.GeoCoordinates) error {
   114  	v, err := geoCoordiantesToVector(coordinates)
   115  	if err != nil {
   116  		return errors.Wrap(err, "invalid arguments")
   117  	}
   118  
   119  	return i.vectorIndex.Add(id, v)
   120  }
   121  
   122  // WithinGeoRange searches the index by the specified range. It is thread-safe
   123  // and can be called concurrently.
   124  func (i *Index) WithinRange(ctx context.Context,
   125  	geoRange filters.GeoRange,
   126  ) ([]uint64, error) {
   127  	if geoRange.GeoCoordinates == nil {
   128  		return nil, fmt.Errorf("invalid arguments: GeoCoordinates in range must be set")
   129  	}
   130  
   131  	query, err := geoCoordiantesToVector(geoRange.GeoCoordinates)
   132  	if err != nil {
   133  		return nil, errors.Wrap(err, "invalid arguments")
   134  	}
   135  
   136  	return i.vectorIndex.KnnSearchByVectorMaxDist(query, geoRange.Distance, 800, nil)
   137  }
   138  
   139  func (i *Index) Delete(id uint64) error {
   140  	return i.vectorIndex.Delete(id)
   141  }