github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segment_group_compaction.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "fmt" 16 "math" 17 "os" 18 "path/filepath" 19 "strings" 20 21 "github.com/pkg/errors" 22 "github.com/prometheus/client_golang/prometheus" 23 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex" 24 "github.com/weaviate/weaviate/adapters/repos/db/roaringset" 25 "github.com/weaviate/weaviate/entities/cyclemanager" 26 ) 27 28 func (sg *SegmentGroup) bestCompactionCandidatePair() []int { 29 sg.maintenanceLock.RLock() 30 defer sg.maintenanceLock.RUnlock() 31 32 // if true, the parent shard has indicated that it has 33 // entered an immutable state. During this time, the 34 // SegmentGroup should refrain from flushing until its 35 // shard indicates otherwise 36 if sg.isReadyOnly() { 37 return nil 38 } 39 40 // Nothing to compact 41 if len(sg.segments) < 2 { 42 return nil 43 } 44 45 // first determine the lowest level with candidates 46 levels := map[uint16]int{} 47 lowestPairLevel := uint16(math.MaxUint16) 48 lowestLevel := uint16(math.MaxUint16) 49 lowestIndex := -1 50 secondLowestIndex := -1 51 pairExists := false 52 53 for ind, seg := range sg.segments { 54 levels[seg.level]++ 55 val := levels[seg.level] 56 if val > 1 { 57 if seg.level < lowestPairLevel { 58 lowestPairLevel = seg.level 59 pairExists = true 60 } 61 } 62 63 if seg.level < lowestLevel { 64 secondLowestIndex = lowestIndex 65 lowestLevel = seg.level 66 lowestIndex = ind 67 } 68 } 69 70 if pairExists { 71 // now pick any two segments which match the level 72 var res []int 73 74 for i, segment := range sg.segments { 75 if len(res) >= 2 { 76 break 77 } 78 79 if segment.level == lowestPairLevel { 80 res = append(res, i) 81 } 82 } 83 84 return res 85 } else { 86 if sg.compactLeftOverSegments { 87 // Some segments exist, but none are of the same level 88 // Merge the two lowest segments 89 90 return []int{secondLowestIndex, lowestIndex} 91 } else { 92 // No segments of the same level exist, and we are not allowed to merge the lowest segments 93 // This means we cannot compact. Set COMPACT_LEFTOVER_SEGMENTS to true to compact the remaining segments 94 return nil 95 } 96 } 97 } 98 99 // segmentAtPos retrieves the segment for the given position using a read-lock 100 func (sg *SegmentGroup) segmentAtPos(pos int) *segment { 101 sg.maintenanceLock.RLock() 102 defer sg.maintenanceLock.RUnlock() 103 104 return sg.segments[pos] 105 } 106 107 func segmentID(path string) string { 108 filename := filepath.Base(path) 109 return strings.TrimSuffix(strings.TrimPrefix(filename, "segment-"), ".db") 110 } 111 112 func (sg *SegmentGroup) compactOnce() (bool, error) { 113 // Is it safe to only occasionally lock instead of the entire duration? Yes, 114 // because other than compaction the only change to the segments array could 115 // be an append because of a new flush cycle, so we do not need to guarantee 116 // that the array contents stay stable over the duration of an entire 117 // compaction. We do however need to protect against a read-while-write (race 118 // condition) on the array. Thus any read from sg.segments need to protected 119 pair := sg.bestCompactionCandidatePair() 120 if pair == nil { 121 // nothing to do 122 return false, nil 123 } 124 125 leftSegment := sg.segmentAtPos(pair[0]) 126 rightSegment := sg.segmentAtPos(pair[1]) 127 128 path := filepath.Join(sg.dir, "segment-"+segmentID(leftSegment.path)+"_"+segmentID(rightSegment.path)+".db.tmp") 129 130 f, err := os.Create(path) 131 if err != nil { 132 return false, err 133 } 134 135 scratchSpacePath := rightSegment.path + "compaction.scratch.d" 136 137 // the assumption is that the first element is older, and/or a higher level 138 level := leftSegment.level 139 secondaryIndices := leftSegment.secondaryIndexCount 140 141 if level == rightSegment.level { 142 level = level + 1 143 } 144 145 strategy := leftSegment.strategy 146 cleanupTombstones := !sg.keepTombstones && pair[0] == 0 147 148 pathLabel := "n/a" 149 if sg.metrics != nil && !sg.metrics.groupClasses { 150 pathLabel = sg.dir 151 } 152 switch strategy { 153 154 // TODO: call metrics just once with variable strategy label 155 156 case segmentindex.StrategyReplace: 157 c := newCompactorReplace(f, leftSegment.newCursor(), 158 rightSegment.newCursor(), level, secondaryIndices, scratchSpacePath, cleanupTombstones) 159 160 if sg.metrics != nil { 161 sg.metrics.CompactionReplace.With(prometheus.Labels{"path": pathLabel}).Inc() 162 defer sg.metrics.CompactionReplace.With(prometheus.Labels{"path": pathLabel}).Dec() 163 } 164 165 if err := c.do(); err != nil { 166 return false, err 167 } 168 case segmentindex.StrategySetCollection: 169 c := newCompactorSetCollection(f, leftSegment.newCollectionCursor(), 170 rightSegment.newCollectionCursor(), level, secondaryIndices, 171 scratchSpacePath, cleanupTombstones) 172 173 if sg.metrics != nil { 174 sg.metrics.CompactionSet.With(prometheus.Labels{"path": pathLabel}).Inc() 175 defer sg.metrics.CompactionSet.With(prometheus.Labels{"path": pathLabel}).Dec() 176 } 177 178 if err := c.do(); err != nil { 179 return false, err 180 } 181 case segmentindex.StrategyMapCollection: 182 c := newCompactorMapCollection(f, 183 leftSegment.newCollectionCursorReusable(), 184 rightSegment.newCollectionCursorReusable(), 185 level, secondaryIndices, scratchSpacePath, sg.mapRequiresSorting, cleanupTombstones) 186 187 if sg.metrics != nil { 188 sg.metrics.CompactionMap.With(prometheus.Labels{"path": pathLabel}).Inc() 189 defer sg.metrics.CompactionMap.With(prometheus.Labels{"path": pathLabel}).Dec() 190 } 191 192 if err := c.do(); err != nil { 193 return false, err 194 } 195 case segmentindex.StrategyRoaringSet: 196 leftCursor := leftSegment.newRoaringSetCursor() 197 rightCursor := rightSegment.newRoaringSetCursor() 198 199 c := roaringset.NewCompactor(f, leftCursor, rightCursor, 200 level, scratchSpacePath, cleanupTombstones) 201 202 if sg.metrics != nil { 203 sg.metrics.CompactionRoaringSet.With(prometheus.Labels{"path": pathLabel}).Set(1) 204 defer sg.metrics.CompactionRoaringSet.With(prometheus.Labels{"path": pathLabel}).Set(0) 205 } 206 207 if err := c.Do(); err != nil { 208 return false, err 209 } 210 211 default: 212 return false, errors.Errorf("unrecognized strategy %v", strategy) 213 } 214 215 if err := f.Sync(); err != nil { 216 return false, errors.Wrap(err, "fsync compacted segment file") 217 } 218 219 if err := f.Close(); err != nil { 220 return false, errors.Wrap(err, "close compacted segment file") 221 } 222 223 if err := sg.replaceCompactedSegments(pair[0], pair[1], path); err != nil { 224 return false, errors.Wrap(err, "replace compacted segments") 225 } 226 227 return true, nil 228 } 229 230 func (sg *SegmentGroup) replaceCompactedSegments(old1, old2 int, 231 newPathTmp string, 232 ) error { 233 sg.maintenanceLock.RLock() 234 updatedCountNetAdditions := sg.segments[old1].countNetAdditions + 235 sg.segments[old2].countNetAdditions 236 sg.maintenanceLock.RUnlock() 237 238 precomputedFiles, err := preComputeSegmentMeta(newPathTmp, 239 updatedCountNetAdditions, sg.logger, 240 sg.useBloomFilter, sg.calcCountNetAdditions) 241 if err != nil { 242 return fmt.Errorf("precompute segment meta: %w", err) 243 } 244 245 sg.maintenanceLock.Lock() 246 defer sg.maintenanceLock.Unlock() 247 248 leftSegment := sg.segments[old1] 249 rightSegment := sg.segments[old2] 250 251 if err := leftSegment.close(); err != nil { 252 return errors.Wrap(err, "close disk segment") 253 } 254 255 if err := rightSegment.close(); err != nil { 256 return errors.Wrap(err, "close disk segment") 257 } 258 259 if err := leftSegment.drop(); err != nil { 260 return errors.Wrap(err, "drop disk segment") 261 } 262 263 if err := rightSegment.drop(); err != nil { 264 return errors.Wrap(err, "drop disk segment") 265 } 266 267 sg.segments[old1] = nil 268 sg.segments[old2] = nil 269 270 var newPath string 271 // the old segments have been deleted, we can now safely remove the .tmp 272 // extension from the new segment itself and the pre-computed files which 273 // carried the name of the second old segment 274 for i, path := range precomputedFiles { 275 updated, err := sg.stripTmpExtension(path, segmentID(leftSegment.path), segmentID(rightSegment.path)) 276 if err != nil { 277 return errors.Wrap(err, "strip .tmp extension of new segment") 278 } 279 280 if i == 0 { 281 // the first element in the list is the segment itself 282 newPath = updated 283 } 284 } 285 286 seg, err := newSegment(newPath, sg.logger, sg.metrics, nil, 287 sg.mmapContents, sg.useBloomFilter, sg.calcCountNetAdditions, false) 288 if err != nil { 289 return errors.Wrap(err, "create new segment") 290 } 291 292 sg.segments[old2] = seg 293 294 sg.segments = append(sg.segments[:old1], sg.segments[old1+1:]...) 295 296 return nil 297 } 298 299 func (sg *SegmentGroup) stripTmpExtension(oldPath, left, right string) (string, error) { 300 ext := filepath.Ext(oldPath) 301 if ext != ".tmp" { 302 return "", errors.Errorf("segment %q did not have .tmp extension", oldPath) 303 } 304 newPath := oldPath[:len(oldPath)-len(ext)] 305 306 newPath = strings.ReplaceAll(newPath, fmt.Sprintf("%s_%s", left, right), right) 307 308 if err := os.Rename(oldPath, newPath); err != nil { 309 return "", errors.Wrapf(err, "rename %q -> %q", oldPath, newPath) 310 } 311 312 return newPath, nil 313 } 314 315 func (sg *SegmentGroup) compactIfLevelsMatch(shouldAbort cyclemanager.ShouldAbortCallback) bool { 316 sg.monitorSegments() 317 318 compacted, err := sg.compactOnce() 319 if err != nil { 320 sg.logger.WithField("action", "lsm_compaction"). 321 WithField("path", sg.dir). 322 WithError(err). 323 Errorf("compaction failed") 324 } 325 326 if compacted { 327 return true 328 } else { 329 sg.logger.WithField("action", "lsm_compaction"). 330 WithField("path", sg.dir). 331 Trace("no segment eligible for compaction") 332 return false 333 } 334 } 335 336 func (sg *SegmentGroup) Len() int { 337 sg.maintenanceLock.RLock() 338 defer sg.maintenanceLock.RUnlock() 339 340 return len(sg.segments) 341 } 342 343 func (sg *SegmentGroup) monitorSegments() { 344 if sg.metrics == nil || sg.metrics.groupClasses { 345 return 346 } 347 348 sg.metrics.ActiveSegments.With(prometheus.Labels{ 349 "strategy": sg.strategy, 350 "path": sg.dir, 351 }).Set(float64(sg.Len())) 352 353 stats := sg.segmentLevelStats() 354 stats.fillMissingLevels() 355 stats.report(sg.metrics, sg.strategy, sg.dir) 356 } 357 358 type segmentLevelStats struct { 359 indexes map[uint16]int 360 payloads map[uint16]int 361 count map[uint16]int 362 } 363 364 func newSegmentLevelStats() segmentLevelStats { 365 return segmentLevelStats{ 366 indexes: map[uint16]int{}, 367 payloads: map[uint16]int{}, 368 count: map[uint16]int{}, 369 } 370 } 371 372 func (sg *SegmentGroup) segmentLevelStats() segmentLevelStats { 373 sg.maintenanceLock.RLock() 374 defer sg.maintenanceLock.RUnlock() 375 376 stats := newSegmentLevelStats() 377 378 for _, seg := range sg.segments { 379 stats.count[seg.level]++ 380 381 cur := stats.indexes[seg.level] 382 cur += seg.index.Size() 383 stats.indexes[seg.level] = cur 384 385 cur = stats.payloads[seg.level] 386 cur += seg.PayloadSize() 387 stats.payloads[seg.level] = cur 388 } 389 390 return stats 391 } 392 393 // fill missing levels 394 // 395 // Imagine we had exactly two segments of level 4 before, and there were just 396 // compacted to single segment of level 5. As a result, there should be no 397 // more segments of level 4. However, our current logic only loops over 398 // existing segments. As a result, we need to check what the highest level 399 // is, then for every level lower than the highest check if we are missing 400 // data. If yes, we need to explicitly set the gauges to 0. 401 func (s *segmentLevelStats) fillMissingLevels() { 402 maxLevel := uint16(0) 403 for level := range s.count { 404 if level > maxLevel { 405 maxLevel = level 406 } 407 } 408 409 if maxLevel > 0 { 410 for level := uint16(0); level < maxLevel; level++ { 411 if _, ok := s.count[level]; ok { 412 continue 413 } 414 415 // there is no entry for this level, we must explicitly set it to 0 416 s.count[level] = 0 417 s.indexes[level] = 0 418 s.payloads[level] = 0 419 } 420 } 421 } 422 423 func (s *segmentLevelStats) report(metrics *Metrics, 424 strategy, dir string, 425 ) { 426 for level, size := range s.indexes { 427 metrics.SegmentSize.With(prometheus.Labels{ 428 "strategy": strategy, 429 "unit": "index", 430 "level": fmt.Sprint(level), 431 "path": dir, 432 }).Set(float64(size)) 433 } 434 435 for level, size := range s.payloads { 436 metrics.SegmentSize.With(prometheus.Labels{ 437 "strategy": strategy, 438 "unit": "payload", 439 "level": fmt.Sprint(level), 440 "path": dir, 441 }).Set(float64(size)) 442 } 443 444 for level, count := range s.count { 445 metrics.SegmentCount.With(prometheus.Labels{ 446 "strategy": strategy, 447 "level": fmt.Sprint(level), 448 "path": dir, 449 }).Set(float64(count)) 450 } 451 }