github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segment_group_compaction.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"fmt"
    16  	"math"
    17  	"os"
    18  	"path/filepath"
    19  	"strings"
    20  
    21  	"github.com/pkg/errors"
    22  	"github.com/prometheus/client_golang/prometheus"
    23  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex"
    24  	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
    25  	"github.com/weaviate/weaviate/entities/cyclemanager"
    26  )
    27  
    28  func (sg *SegmentGroup) bestCompactionCandidatePair() []int {
    29  	sg.maintenanceLock.RLock()
    30  	defer sg.maintenanceLock.RUnlock()
    31  
    32  	// if true, the parent shard has indicated that it has
    33  	// entered an immutable state. During this time, the
    34  	// SegmentGroup should refrain from flushing until its
    35  	// shard indicates otherwise
    36  	if sg.isReadyOnly() {
    37  		return nil
    38  	}
    39  
    40  	// Nothing to compact
    41  	if len(sg.segments) < 2 {
    42  		return nil
    43  	}
    44  
    45  	// first determine the lowest level with candidates
    46  	levels := map[uint16]int{}
    47  	lowestPairLevel := uint16(math.MaxUint16)
    48  	lowestLevel := uint16(math.MaxUint16)
    49  	lowestIndex := -1
    50  	secondLowestIndex := -1
    51  	pairExists := false
    52  
    53  	for ind, seg := range sg.segments {
    54  		levels[seg.level]++
    55  		val := levels[seg.level]
    56  		if val > 1 {
    57  			if seg.level < lowestPairLevel {
    58  				lowestPairLevel = seg.level
    59  				pairExists = true
    60  			}
    61  		}
    62  
    63  		if seg.level < lowestLevel {
    64  			secondLowestIndex = lowestIndex
    65  			lowestLevel = seg.level
    66  			lowestIndex = ind
    67  		}
    68  	}
    69  
    70  	if pairExists {
    71  		// now pick any two segments which match the level
    72  		var res []int
    73  
    74  		for i, segment := range sg.segments {
    75  			if len(res) >= 2 {
    76  				break
    77  			}
    78  
    79  			if segment.level == lowestPairLevel {
    80  				res = append(res, i)
    81  			}
    82  		}
    83  
    84  		return res
    85  	} else {
    86  		if sg.compactLeftOverSegments {
    87  			// Some segments exist, but none are of the same level
    88  			// Merge the two lowest segments
    89  
    90  			return []int{secondLowestIndex, lowestIndex}
    91  		} else {
    92  			// No segments of the same level exist, and we are not allowed to merge the lowest segments
    93  			// This means we cannot compact.  Set COMPACT_LEFTOVER_SEGMENTS to true to compact the remaining segments
    94  			return nil
    95  		}
    96  	}
    97  }
    98  
    99  // segmentAtPos retrieves the segment for the given position using a read-lock
   100  func (sg *SegmentGroup) segmentAtPos(pos int) *segment {
   101  	sg.maintenanceLock.RLock()
   102  	defer sg.maintenanceLock.RUnlock()
   103  
   104  	return sg.segments[pos]
   105  }
   106  
   107  func segmentID(path string) string {
   108  	filename := filepath.Base(path)
   109  	return strings.TrimSuffix(strings.TrimPrefix(filename, "segment-"), ".db")
   110  }
   111  
   112  func (sg *SegmentGroup) compactOnce() (bool, error) {
   113  	// Is it safe to only occasionally lock instead of the entire duration? Yes,
   114  	// because other than compaction the only change to the segments array could
   115  	// be an append because of a new flush cycle, so we do not need to guarantee
   116  	// that the array contents stay stable over the duration of an entire
   117  	// compaction. We do however need to protect against a read-while-write (race
   118  	// condition) on the array. Thus any read from sg.segments need to protected
   119  	pair := sg.bestCompactionCandidatePair()
   120  	if pair == nil {
   121  		// nothing to do
   122  		return false, nil
   123  	}
   124  
   125  	leftSegment := sg.segmentAtPos(pair[0])
   126  	rightSegment := sg.segmentAtPos(pair[1])
   127  
   128  	path := filepath.Join(sg.dir, "segment-"+segmentID(leftSegment.path)+"_"+segmentID(rightSegment.path)+".db.tmp")
   129  
   130  	f, err := os.Create(path)
   131  	if err != nil {
   132  		return false, err
   133  	}
   134  
   135  	scratchSpacePath := rightSegment.path + "compaction.scratch.d"
   136  
   137  	// the assumption is that the first element is older, and/or a higher level
   138  	level := leftSegment.level
   139  	secondaryIndices := leftSegment.secondaryIndexCount
   140  
   141  	if level == rightSegment.level {
   142  		level = level + 1
   143  	}
   144  
   145  	strategy := leftSegment.strategy
   146  	cleanupTombstones := !sg.keepTombstones && pair[0] == 0
   147  
   148  	pathLabel := "n/a"
   149  	if sg.metrics != nil && !sg.metrics.groupClasses {
   150  		pathLabel = sg.dir
   151  	}
   152  	switch strategy {
   153  
   154  	// TODO: call metrics just once with variable strategy label
   155  
   156  	case segmentindex.StrategyReplace:
   157  		c := newCompactorReplace(f, leftSegment.newCursor(),
   158  			rightSegment.newCursor(), level, secondaryIndices, scratchSpacePath, cleanupTombstones)
   159  
   160  		if sg.metrics != nil {
   161  			sg.metrics.CompactionReplace.With(prometheus.Labels{"path": pathLabel}).Inc()
   162  			defer sg.metrics.CompactionReplace.With(prometheus.Labels{"path": pathLabel}).Dec()
   163  		}
   164  
   165  		if err := c.do(); err != nil {
   166  			return false, err
   167  		}
   168  	case segmentindex.StrategySetCollection:
   169  		c := newCompactorSetCollection(f, leftSegment.newCollectionCursor(),
   170  			rightSegment.newCollectionCursor(), level, secondaryIndices,
   171  			scratchSpacePath, cleanupTombstones)
   172  
   173  		if sg.metrics != nil {
   174  			sg.metrics.CompactionSet.With(prometheus.Labels{"path": pathLabel}).Inc()
   175  			defer sg.metrics.CompactionSet.With(prometheus.Labels{"path": pathLabel}).Dec()
   176  		}
   177  
   178  		if err := c.do(); err != nil {
   179  			return false, err
   180  		}
   181  	case segmentindex.StrategyMapCollection:
   182  		c := newCompactorMapCollection(f,
   183  			leftSegment.newCollectionCursorReusable(),
   184  			rightSegment.newCollectionCursorReusable(),
   185  			level, secondaryIndices, scratchSpacePath, sg.mapRequiresSorting, cleanupTombstones)
   186  
   187  		if sg.metrics != nil {
   188  			sg.metrics.CompactionMap.With(prometheus.Labels{"path": pathLabel}).Inc()
   189  			defer sg.metrics.CompactionMap.With(prometheus.Labels{"path": pathLabel}).Dec()
   190  		}
   191  
   192  		if err := c.do(); err != nil {
   193  			return false, err
   194  		}
   195  	case segmentindex.StrategyRoaringSet:
   196  		leftCursor := leftSegment.newRoaringSetCursor()
   197  		rightCursor := rightSegment.newRoaringSetCursor()
   198  
   199  		c := roaringset.NewCompactor(f, leftCursor, rightCursor,
   200  			level, scratchSpacePath, cleanupTombstones)
   201  
   202  		if sg.metrics != nil {
   203  			sg.metrics.CompactionRoaringSet.With(prometheus.Labels{"path": pathLabel}).Set(1)
   204  			defer sg.metrics.CompactionRoaringSet.With(prometheus.Labels{"path": pathLabel}).Set(0)
   205  		}
   206  
   207  		if err := c.Do(); err != nil {
   208  			return false, err
   209  		}
   210  
   211  	default:
   212  		return false, errors.Errorf("unrecognized strategy %v", strategy)
   213  	}
   214  
   215  	if err := f.Sync(); err != nil {
   216  		return false, errors.Wrap(err, "fsync compacted segment file")
   217  	}
   218  
   219  	if err := f.Close(); err != nil {
   220  		return false, errors.Wrap(err, "close compacted segment file")
   221  	}
   222  
   223  	if err := sg.replaceCompactedSegments(pair[0], pair[1], path); err != nil {
   224  		return false, errors.Wrap(err, "replace compacted segments")
   225  	}
   226  
   227  	return true, nil
   228  }
   229  
   230  func (sg *SegmentGroup) replaceCompactedSegments(old1, old2 int,
   231  	newPathTmp string,
   232  ) error {
   233  	sg.maintenanceLock.RLock()
   234  	updatedCountNetAdditions := sg.segments[old1].countNetAdditions +
   235  		sg.segments[old2].countNetAdditions
   236  	sg.maintenanceLock.RUnlock()
   237  
   238  	precomputedFiles, err := preComputeSegmentMeta(newPathTmp,
   239  		updatedCountNetAdditions, sg.logger,
   240  		sg.useBloomFilter, sg.calcCountNetAdditions)
   241  	if err != nil {
   242  		return fmt.Errorf("precompute segment meta: %w", err)
   243  	}
   244  
   245  	sg.maintenanceLock.Lock()
   246  	defer sg.maintenanceLock.Unlock()
   247  
   248  	leftSegment := sg.segments[old1]
   249  	rightSegment := sg.segments[old2]
   250  
   251  	if err := leftSegment.close(); err != nil {
   252  		return errors.Wrap(err, "close disk segment")
   253  	}
   254  
   255  	if err := rightSegment.close(); err != nil {
   256  		return errors.Wrap(err, "close disk segment")
   257  	}
   258  
   259  	if err := leftSegment.drop(); err != nil {
   260  		return errors.Wrap(err, "drop disk segment")
   261  	}
   262  
   263  	if err := rightSegment.drop(); err != nil {
   264  		return errors.Wrap(err, "drop disk segment")
   265  	}
   266  
   267  	sg.segments[old1] = nil
   268  	sg.segments[old2] = nil
   269  
   270  	var newPath string
   271  	// the old segments have been deleted, we can now safely remove the .tmp
   272  	// extension from the new segment itself and the pre-computed files which
   273  	// carried the name of the second old segment
   274  	for i, path := range precomputedFiles {
   275  		updated, err := sg.stripTmpExtension(path, segmentID(leftSegment.path), segmentID(rightSegment.path))
   276  		if err != nil {
   277  			return errors.Wrap(err, "strip .tmp extension of new segment")
   278  		}
   279  
   280  		if i == 0 {
   281  			// the first element in the list is the segment itself
   282  			newPath = updated
   283  		}
   284  	}
   285  
   286  	seg, err := newSegment(newPath, sg.logger, sg.metrics, nil,
   287  		sg.mmapContents, sg.useBloomFilter, sg.calcCountNetAdditions, false)
   288  	if err != nil {
   289  		return errors.Wrap(err, "create new segment")
   290  	}
   291  
   292  	sg.segments[old2] = seg
   293  
   294  	sg.segments = append(sg.segments[:old1], sg.segments[old1+1:]...)
   295  
   296  	return nil
   297  }
   298  
   299  func (sg *SegmentGroup) stripTmpExtension(oldPath, left, right string) (string, error) {
   300  	ext := filepath.Ext(oldPath)
   301  	if ext != ".tmp" {
   302  		return "", errors.Errorf("segment %q did not have .tmp extension", oldPath)
   303  	}
   304  	newPath := oldPath[:len(oldPath)-len(ext)]
   305  
   306  	newPath = strings.ReplaceAll(newPath, fmt.Sprintf("%s_%s", left, right), right)
   307  
   308  	if err := os.Rename(oldPath, newPath); err != nil {
   309  		return "", errors.Wrapf(err, "rename %q -> %q", oldPath, newPath)
   310  	}
   311  
   312  	return newPath, nil
   313  }
   314  
   315  func (sg *SegmentGroup) compactIfLevelsMatch(shouldAbort cyclemanager.ShouldAbortCallback) bool {
   316  	sg.monitorSegments()
   317  
   318  	compacted, err := sg.compactOnce()
   319  	if err != nil {
   320  		sg.logger.WithField("action", "lsm_compaction").
   321  			WithField("path", sg.dir).
   322  			WithError(err).
   323  			Errorf("compaction failed")
   324  	}
   325  
   326  	if compacted {
   327  		return true
   328  	} else {
   329  		sg.logger.WithField("action", "lsm_compaction").
   330  			WithField("path", sg.dir).
   331  			Trace("no segment eligible for compaction")
   332  		return false
   333  	}
   334  }
   335  
   336  func (sg *SegmentGroup) Len() int {
   337  	sg.maintenanceLock.RLock()
   338  	defer sg.maintenanceLock.RUnlock()
   339  
   340  	return len(sg.segments)
   341  }
   342  
   343  func (sg *SegmentGroup) monitorSegments() {
   344  	if sg.metrics == nil || sg.metrics.groupClasses {
   345  		return
   346  	}
   347  
   348  	sg.metrics.ActiveSegments.With(prometheus.Labels{
   349  		"strategy": sg.strategy,
   350  		"path":     sg.dir,
   351  	}).Set(float64(sg.Len()))
   352  
   353  	stats := sg.segmentLevelStats()
   354  	stats.fillMissingLevels()
   355  	stats.report(sg.metrics, sg.strategy, sg.dir)
   356  }
   357  
   358  type segmentLevelStats struct {
   359  	indexes  map[uint16]int
   360  	payloads map[uint16]int
   361  	count    map[uint16]int
   362  }
   363  
   364  func newSegmentLevelStats() segmentLevelStats {
   365  	return segmentLevelStats{
   366  		indexes:  map[uint16]int{},
   367  		payloads: map[uint16]int{},
   368  		count:    map[uint16]int{},
   369  	}
   370  }
   371  
   372  func (sg *SegmentGroup) segmentLevelStats() segmentLevelStats {
   373  	sg.maintenanceLock.RLock()
   374  	defer sg.maintenanceLock.RUnlock()
   375  
   376  	stats := newSegmentLevelStats()
   377  
   378  	for _, seg := range sg.segments {
   379  		stats.count[seg.level]++
   380  
   381  		cur := stats.indexes[seg.level]
   382  		cur += seg.index.Size()
   383  		stats.indexes[seg.level] = cur
   384  
   385  		cur = stats.payloads[seg.level]
   386  		cur += seg.PayloadSize()
   387  		stats.payloads[seg.level] = cur
   388  	}
   389  
   390  	return stats
   391  }
   392  
   393  // fill missing levels
   394  //
   395  // Imagine we had exactly two segments of level 4 before, and there were just
   396  // compacted to single segment of level 5. As a result, there should be no
   397  // more segments of level 4. However, our current logic only loops over
   398  // existing segments. As a result, we need to check what the highest level
   399  // is, then for every level lower than the highest check if we are missing
   400  // data. If yes, we need to explicitly set the gauges to 0.
   401  func (s *segmentLevelStats) fillMissingLevels() {
   402  	maxLevel := uint16(0)
   403  	for level := range s.count {
   404  		if level > maxLevel {
   405  			maxLevel = level
   406  		}
   407  	}
   408  
   409  	if maxLevel > 0 {
   410  		for level := uint16(0); level < maxLevel; level++ {
   411  			if _, ok := s.count[level]; ok {
   412  				continue
   413  			}
   414  
   415  			// there is no entry for this level, we must explicitly set it to 0
   416  			s.count[level] = 0
   417  			s.indexes[level] = 0
   418  			s.payloads[level] = 0
   419  		}
   420  	}
   421  }
   422  
   423  func (s *segmentLevelStats) report(metrics *Metrics,
   424  	strategy, dir string,
   425  ) {
   426  	for level, size := range s.indexes {
   427  		metrics.SegmentSize.With(prometheus.Labels{
   428  			"strategy": strategy,
   429  			"unit":     "index",
   430  			"level":    fmt.Sprint(level),
   431  			"path":     dir,
   432  		}).Set(float64(size))
   433  	}
   434  
   435  	for level, size := range s.payloads {
   436  		metrics.SegmentSize.With(prometheus.Labels{
   437  			"strategy": strategy,
   438  			"unit":     "payload",
   439  			"level":    fmt.Sprint(level),
   440  			"path":     dir,
   441  		}).Set(float64(size))
   442  	}
   443  
   444  	for level, count := range s.count {
   445  		metrics.SegmentCount.With(prometheus.Labels{
   446  			"strategy": strategy,
   447  			"level":    fmt.Sprint(level),
   448  			"path":     dir,
   449  		}).Set(float64(count))
   450  	}
   451  }