github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/index_cyclecallbacks.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package db
    13  
    14  import (
    15  	"strings"
    16  	"time"
    17  
    18  	"github.com/weaviate/weaviate/entities/cyclemanager"
    19  	"github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    20  	enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    21  )
    22  
    23  type indexCycleCallbacks struct {
    24  	compactionCallbacks cyclemanager.CycleCallbackGroup
    25  	compactionCycle     cyclemanager.CycleManager
    26  
    27  	flushCallbacks cyclemanager.CycleCallbackGroup
    28  	flushCycle     cyclemanager.CycleManager
    29  
    30  	vectorCommitLoggerCallbacks     cyclemanager.CycleCallbackGroup
    31  	vectorCommitLoggerCycle         cyclemanager.CycleManager
    32  	vectorTombstoneCleanupCallbacks cyclemanager.CycleCallbackGroup
    33  	vectorTombstoneCleanupCycle     cyclemanager.CycleManager
    34  
    35  	geoPropsCommitLoggerCallbacks     cyclemanager.CycleCallbackGroup
    36  	geoPropsCommitLoggerCycle         cyclemanager.CycleManager
    37  	geoPropsTombstoneCleanupCallbacks cyclemanager.CycleCallbackGroup
    38  	geoPropsTombstoneCleanupCycle     cyclemanager.CycleManager
    39  }
    40  
    41  func (index *Index) initCycleCallbacks() {
    42  	vectorTombstoneCleanupIntervalSeconds := hnsw.DefaultCleanupIntervalSeconds
    43  	if hnswUserConfig, ok := index.vectorIndexUserConfig.(hnsw.UserConfig); ok {
    44  		vectorTombstoneCleanupIntervalSeconds = hnswUserConfig.CleanupIntervalSeconds
    45  	}
    46  
    47  	id := func(elems ...string) string {
    48  		elems = append([]string{"index", index.ID()}, elems...)
    49  		return strings.Join(elems, "/")
    50  	}
    51  
    52  	compactionCallbacks := cyclemanager.NewCallbackGroup(id("compaction"), index.logger, _NUMCPU*2)
    53  	compactionCycle := cyclemanager.NewManager(
    54  		cyclemanager.CompactionCycleTicker(),
    55  		compactionCallbacks.CycleCallback, index.logger)
    56  
    57  	flushCallbacks := cyclemanager.NewCallbackGroup(id("flush"), index.logger, _NUMCPU*2)
    58  	flushCycle := cyclemanager.NewManager(
    59  		cyclemanager.MemtableFlushCycleTicker(),
    60  		flushCallbacks.CycleCallback, index.logger)
    61  
    62  	vectorCommitLoggerCallbacks := cyclemanager.NewCallbackGroup(id("vector", "commit_logger"), index.logger, _NUMCPU*2)
    63  	// Previously we had an interval of 10s in here, which was changed to
    64  	// 0.5s as part of gh-1867. There's really no way to wait so long in
    65  	// between checks: If you are running on a low-powered machine, the
    66  	// interval will simply find that there is no work and do nothing in
    67  	// each iteration. However, if you are running on a very powerful
    68  	// machine within 10s you could have potentially created two units of
    69  	// work, but we'll only be handling one every 10s. This means
    70  	// uncombined/uncondensed hnsw commit logs will keep piling up can only
    71  	// be processes long after the initial insert is complete. This also
    72  	// means that if there is a crash during importing a lot of work needs
    73  	// to be done at startup, since the commit logs still contain too many
    74  	// redundancies. So as of now it seems there are only advantages to
    75  	// running the cleanup checks and work much more often.
    76  	//
    77  	// update: switched to dynamic intervals with values between 500ms and 10s
    78  	// introduced to address https://github.com/weaviate/weaviate/issues/2783
    79  	vectorCommitLoggerCycle := cyclemanager.NewManager(
    80  		cyclemanager.HnswCommitLoggerCycleTicker(),
    81  		vectorCommitLoggerCallbacks.CycleCallback, index.logger)
    82  
    83  	vectorTombstoneCleanupCallbacks := cyclemanager.NewCallbackGroup(id("vector", "tombstone_cleanup"), index.logger, _NUMCPU*2)
    84  	vectorTombstoneCleanupCycle := cyclemanager.NewManager(
    85  		cyclemanager.NewFixedTicker(time.Duration(vectorTombstoneCleanupIntervalSeconds)*time.Second),
    86  		vectorTombstoneCleanupCallbacks.CycleCallback, index.logger)
    87  
    88  	geoPropsCommitLoggerCallbacks := cyclemanager.NewCallbackGroup(id("geo_props", "commit_logger"), index.logger, _NUMCPU*2)
    89  	geoPropsCommitLoggerCycle := cyclemanager.NewManager(
    90  		cyclemanager.GeoCommitLoggerCycleTicker(),
    91  		geoPropsCommitLoggerCallbacks.CycleCallback, index.logger)
    92  
    93  	geoPropsTombstoneCleanupCallbacks := cyclemanager.NewCallbackGroup(id("geo_props", "tombstone_cleanup"), index.logger, _NUMCPU*2)
    94  	geoPropsTombstoneCleanupCycle := cyclemanager.NewManager(
    95  		cyclemanager.NewFixedTicker(enthnsw.DefaultCleanupIntervalSeconds*time.Second),
    96  		geoPropsTombstoneCleanupCallbacks.CycleCallback, index.logger)
    97  
    98  	index.cycleCallbacks = &indexCycleCallbacks{
    99  		compactionCallbacks: compactionCallbacks,
   100  		compactionCycle:     compactionCycle,
   101  		flushCallbacks:      flushCallbacks,
   102  		flushCycle:          flushCycle,
   103  
   104  		vectorCommitLoggerCallbacks:     vectorCommitLoggerCallbacks,
   105  		vectorCommitLoggerCycle:         vectorCommitLoggerCycle,
   106  		vectorTombstoneCleanupCallbacks: vectorTombstoneCleanupCallbacks,
   107  		vectorTombstoneCleanupCycle:     vectorTombstoneCleanupCycle,
   108  
   109  		geoPropsCommitLoggerCallbacks:     geoPropsCommitLoggerCallbacks,
   110  		geoPropsCommitLoggerCycle:         geoPropsCommitLoggerCycle,
   111  		geoPropsTombstoneCleanupCallbacks: geoPropsTombstoneCleanupCallbacks,
   112  		geoPropsTombstoneCleanupCycle:     geoPropsTombstoneCleanupCycle,
   113  	}
   114  }
   115  
   116  func (index *Index) initCycleCallbacksNoop() {
   117  	index.cycleCallbacks = &indexCycleCallbacks{
   118  		compactionCallbacks: cyclemanager.NewCallbackGroupNoop(),
   119  		compactionCycle:     cyclemanager.NewManagerNoop(),
   120  		flushCallbacks:      cyclemanager.NewCallbackGroupNoop(),
   121  		flushCycle:          cyclemanager.NewManagerNoop(),
   122  
   123  		vectorCommitLoggerCallbacks:     cyclemanager.NewCallbackGroupNoop(),
   124  		vectorCommitLoggerCycle:         cyclemanager.NewManagerNoop(),
   125  		vectorTombstoneCleanupCallbacks: cyclemanager.NewCallbackGroupNoop(),
   126  		vectorTombstoneCleanupCycle:     cyclemanager.NewManagerNoop(),
   127  
   128  		geoPropsCommitLoggerCallbacks:     cyclemanager.NewCallbackGroupNoop(),
   129  		geoPropsCommitLoggerCycle:         cyclemanager.NewManagerNoop(),
   130  		geoPropsTombstoneCleanupCallbacks: cyclemanager.NewCallbackGroupNoop(),
   131  		geoPropsTombstoneCleanupCycle:     cyclemanager.NewManagerNoop(),
   132  	}
   133  }