github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/commit_logger.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "context" 16 "fmt" 17 "os" 18 "path/filepath" 19 "sort" 20 "strconv" 21 "strings" 22 "sync" 23 "time" 24 25 "github.com/pkg/errors" 26 "github.com/sirupsen/logrus" 27 "github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers" 28 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/commitlog" 29 "github.com/weaviate/weaviate/entities/cyclemanager" 30 "github.com/weaviate/weaviate/entities/errorcompounder" 31 ) 32 33 const defaultCommitLogSize = 500 * 1024 * 1024 34 35 func commitLogFileName(rootPath, indexName, fileName string) string { 36 return fmt.Sprintf("%s/%s", commitLogDirectory(rootPath, indexName), fileName) 37 } 38 39 func commitLogDirectory(rootPath, name string) string { 40 return fmt.Sprintf("%s/%s.hnsw.commitlog.d", rootPath, name) 41 } 42 43 func NewCommitLogger(rootPath, name string, logger logrus.FieldLogger, 44 maintenanceCallbacks cyclemanager.CycleCallbackGroup, opts ...CommitlogOption, 45 ) (*hnswCommitLogger, error) { 46 l := &hnswCommitLogger{ 47 rootPath: rootPath, 48 id: name, 49 condensor: NewMemoryCondensor(logger), 50 logger: logger, 51 52 // both can be overwritten using functional options 53 maxSizeIndividual: defaultCommitLogSize / 5, 54 maxSizeCombining: defaultCommitLogSize, 55 } 56 57 for _, o := range opts { 58 if err := o(l); err != nil { 59 return nil, err 60 } 61 } 62 63 fd, err := getLatestCommitFileOrCreate(rootPath, name) 64 if err != nil { 65 return nil, err 66 } 67 68 id := func(elems ...string) string { 69 elems = append([]string{"commit_logger"}, elems...) 70 elems = append(elems, l.id) 71 return strings.Join(elems, "/") 72 } 73 l.commitLogger = commitlog.NewLoggerWithFile(fd) 74 l.switchLogsCallbackCtrl = maintenanceCallbacks.Register(id("switch_logs"), l.startSwitchLogs) 75 l.condenseLogsCallbackCtrl = maintenanceCallbacks.Register(id("condense_logs"), l.startCombineAndCondenseLogs) 76 77 return l, nil 78 } 79 80 func getLatestCommitFileOrCreate(rootPath, name string) (*os.File, error) { 81 dir := commitLogDirectory(rootPath, name) 82 err := os.MkdirAll(dir, os.ModePerm) 83 if err != nil { 84 return nil, errors.Wrap(err, "create commit logger directory") 85 } 86 87 fileName, ok, err := getCurrentCommitLogFileName(dir) 88 if err != nil { 89 return nil, errors.Wrap(err, "find commit logger file in directory") 90 } 91 92 if !ok { 93 // this is a new commit log, initialize with the current time stamp 94 fileName = fmt.Sprintf("%d", time.Now().Unix()) 95 } 96 97 fd, err := os.OpenFile(commitLogFileName(rootPath, name, fileName), 98 os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o666) 99 if err != nil { 100 return nil, errors.Wrap(err, "create commit log file") 101 } 102 103 return fd, nil 104 } 105 106 // getCommitFileNames in order, from old to new 107 func getCommitFileNames(rootPath, name string) ([]string, error) { 108 dir := commitLogDirectory(rootPath, name) 109 err := os.MkdirAll(dir, os.ModePerm) 110 if err != nil { 111 return nil, errors.Wrap(err, "create commit logger directory") 112 } 113 114 files, err := os.ReadDir(dir) 115 if err != nil { 116 return nil, errors.Wrap(err, "browse commit logger directory") 117 } 118 119 files = removeTmpScratchOrHiddenFiles(files) 120 files, err = removeTmpCombiningFiles(dir, files) 121 if err != nil { 122 return nil, errors.Wrap(err, "remove temporary files") 123 } 124 125 if len(files) == 0 { 126 return nil, nil 127 } 128 129 ec := &errorcompounder.ErrorCompounder{} 130 sort.Slice(files, func(a, b int) bool { 131 ts1, err := asTimeStamp(files[a].Name()) 132 if err != nil { 133 ec.Add(err) 134 } 135 136 ts2, err := asTimeStamp(files[b].Name()) 137 if err != nil { 138 ec.Add(err) 139 } 140 return ts1 < ts2 141 }) 142 if err := ec.ToError(); err != nil { 143 return nil, err 144 } 145 146 out := make([]string, len(files)) 147 for i, file := range files { 148 out[i] = commitLogFileName(rootPath, name, file.Name()) 149 } 150 151 return out, nil 152 } 153 154 // getCurrentCommitLogFileName returns the fileName and true if a file was 155 // present. If no file was present, the second arg is false. 156 func getCurrentCommitLogFileName(dirPath string) (string, bool, error) { 157 files, err := os.ReadDir(dirPath) 158 if err != nil { 159 return "", false, errors.Wrap(err, "browse commit logger directory") 160 } 161 162 if len(files) == 0 { 163 return "", false, nil 164 } 165 166 files = removeTmpScratchOrHiddenFiles(files) 167 files, err = removeTmpCombiningFiles(dirPath, files) 168 if err != nil { 169 return "", false, errors.Wrap(err, "clean up tmp combining files") 170 } 171 172 ec := &errorcompounder.ErrorCompounder{} 173 sort.Slice(files, func(a, b int) bool { 174 ts1, err := asTimeStamp(files[a].Name()) 175 if err != nil { 176 ec.Add(err) 177 } 178 179 ts2, err := asTimeStamp(files[b].Name()) 180 if err != nil { 181 ec.Add(err) 182 } 183 return ts1 > ts2 184 }) 185 if err := ec.ToError(); err != nil { 186 return "", false, err 187 } 188 189 return files[0].Name(), true, nil 190 } 191 192 func removeTmpScratchOrHiddenFiles(in []os.DirEntry) []os.DirEntry { 193 out := make([]os.DirEntry, len(in)) 194 i := 0 195 for _, info := range in { 196 if strings.HasSuffix(info.Name(), ".scratch.tmp") { 197 continue 198 } 199 200 if strings.HasPrefix(info.Name(), ".") { 201 continue 202 } 203 204 out[i] = info 205 i++ 206 } 207 208 return out[:i] 209 } 210 211 func removeTmpCombiningFiles(dirPath string, 212 in []os.DirEntry, 213 ) ([]os.DirEntry, error) { 214 out := make([]os.DirEntry, len(in)) 215 i := 0 216 for _, info := range in { 217 if strings.HasSuffix(info.Name(), ".combined.tmp") { 218 // a temporary combining file was found which means that the combining 219 // process never completed, this file is thus considered corrupt (too 220 // short) and must be deleted. The original sources still exist (because 221 // the only get deleted after the .tmp file is removed), so it's safe to 222 // delete this without data loss. 223 224 if err := os.Remove(filepath.Join(dirPath, info.Name())); err != nil { 225 return out, errors.Wrap(err, "remove tmp combining file") 226 } 227 continue 228 } 229 230 out[i] = info 231 i++ 232 } 233 234 return out[:i], nil 235 } 236 237 func asTimeStamp(in string) (int64, error) { 238 return strconv.ParseInt(strings.TrimSuffix(in, ".condensed"), 10, 64) 239 } 240 241 type condensor interface { 242 Do(filename string) error 243 } 244 245 type hnswCommitLogger struct { 246 // protect against concurrent attempts to write in the underlying file or 247 // buffer 248 sync.Mutex 249 250 rootPath string 251 id string 252 condensor condensor 253 logger logrus.FieldLogger 254 maxSizeIndividual int64 255 maxSizeCombining int64 256 commitLogger *commitlog.Logger 257 258 switchLogsCallbackCtrl cyclemanager.CycleCallbackCtrl 259 condenseLogsCallbackCtrl cyclemanager.CycleCallbackCtrl 260 } 261 262 type HnswCommitType uint8 // 256 options, plenty of room for future extensions 263 264 const ( 265 AddNode HnswCommitType = iota 266 SetEntryPointMaxLevel 267 AddLinkAtLevel 268 ReplaceLinksAtLevel 269 AddTombstone 270 RemoveTombstone 271 ClearLinks 272 DeleteNode 273 ResetIndex 274 ClearLinksAtLevel // added in v1.8.0-rc.1, see https://github.com/weaviate/weaviate/issues/1701 275 AddLinksAtLevel // added in v1.8.0-rc.1, see https://github.com/weaviate/weaviate/issues/1705 276 AddPQ 277 ) 278 279 func (t HnswCommitType) String() string { 280 switch t { 281 case AddNode: 282 return "AddNode" 283 case SetEntryPointMaxLevel: 284 return "SetEntryPointWithMaxLayer" 285 case AddLinkAtLevel: 286 return "AddLinkAtLevel" 287 case AddLinksAtLevel: 288 return "AddLinksAtLevel" 289 case ReplaceLinksAtLevel: 290 return "ReplaceLinksAtLevel" 291 case AddTombstone: 292 return "AddTombstone" 293 case RemoveTombstone: 294 return "RemoveTombstone" 295 case ClearLinks: 296 return "ClearLinks" 297 case DeleteNode: 298 return "DeleteNode" 299 case ResetIndex: 300 return "ResetIndex" 301 case ClearLinksAtLevel: 302 return "ClearLinksAtLevel" 303 case AddPQ: 304 return "AddProductQuantizer" 305 } 306 return "unknown commit type" 307 } 308 309 func (l *hnswCommitLogger) ID() string { 310 return l.id 311 } 312 313 func (l *hnswCommitLogger) AddPQ(data compressionhelpers.PQData) error { 314 l.Lock() 315 defer l.Unlock() 316 317 return l.commitLogger.AddPQ(data) 318 } 319 320 // AddNode adds an empty node 321 func (l *hnswCommitLogger) AddNode(node *vertex) error { 322 l.Lock() 323 defer l.Unlock() 324 325 return l.commitLogger.AddNode(node.id, node.level) 326 } 327 328 func (l *hnswCommitLogger) SetEntryPointWithMaxLayer(id uint64, level int) error { 329 l.Lock() 330 defer l.Unlock() 331 332 return l.commitLogger.SetEntryPointWithMaxLayer(id, level) 333 } 334 335 func (l *hnswCommitLogger) ReplaceLinksAtLevel(nodeid uint64, level int, targets []uint64) error { 336 l.Lock() 337 defer l.Unlock() 338 339 return l.commitLogger.ReplaceLinksAtLevel(nodeid, level, targets) 340 } 341 342 func (l *hnswCommitLogger) AddLinkAtLevel(nodeid uint64, level int, 343 target uint64, 344 ) error { 345 l.Lock() 346 defer l.Unlock() 347 348 return l.commitLogger.AddLinkAtLevel(nodeid, level, target) 349 } 350 351 func (l *hnswCommitLogger) AddTombstone(nodeid uint64) error { 352 l.Lock() 353 defer l.Unlock() 354 355 return l.commitLogger.AddTombstone(nodeid) 356 } 357 358 func (l *hnswCommitLogger) RemoveTombstone(nodeid uint64) error { 359 l.Lock() 360 defer l.Unlock() 361 362 return l.commitLogger.RemoveTombstone(nodeid) 363 } 364 365 func (l *hnswCommitLogger) ClearLinks(nodeid uint64) error { 366 l.Lock() 367 defer l.Unlock() 368 369 return l.commitLogger.ClearLinks(nodeid) 370 } 371 372 func (l *hnswCommitLogger) ClearLinksAtLevel(nodeid uint64, level uint16) error { 373 l.Lock() 374 defer l.Unlock() 375 376 return l.commitLogger.ClearLinksAtLevel(nodeid, level) 377 } 378 379 func (l *hnswCommitLogger) DeleteNode(nodeid uint64) error { 380 l.Lock() 381 defer l.Unlock() 382 383 return l.commitLogger.DeleteNode(nodeid) 384 } 385 386 func (l *hnswCommitLogger) Reset() error { 387 l.Lock() 388 defer l.Unlock() 389 390 return l.commitLogger.Reset() 391 } 392 393 // Shutdown waits for ongoing maintenance processes to stop, then cancels their 394 // scheduling. The caller can be sure that state on disk is immutable after 395 // calling Shutdown(). 396 func (l *hnswCommitLogger) Shutdown(ctx context.Context) error { 397 if err := l.switchLogsCallbackCtrl.Unregister(ctx); err != nil { 398 return errors.Wrap(err, "failed to unregister commitlog switch from maintenance cycle") 399 } 400 if err := l.condenseLogsCallbackCtrl.Unregister(ctx); err != nil { 401 return errors.Wrap(err, "failed to unregister commitlog condense from maintenance cycle") 402 } 403 return nil 404 } 405 406 func (l *hnswCommitLogger) RootPath() string { 407 return l.rootPath 408 } 409 410 func (l *hnswCommitLogger) startSwitchLogs(shouldAbort cyclemanager.ShouldAbortCallback) bool { 411 executed, err := l.switchCommitLogs(false) 412 if err != nil { 413 l.logger.WithError(err). 414 WithField("action", "hnsw_commit_log_maintenance"). 415 Error("hnsw commit log maintenance failed") 416 } 417 return executed 418 } 419 420 func (l *hnswCommitLogger) startCombineAndCondenseLogs(shouldAbort cyclemanager.ShouldAbortCallback) bool { 421 executed1, err := l.combineLogs() 422 if err != nil { 423 l.logger.WithError(err). 424 WithField("action", "hnsw_commit_log_combining"). 425 Error("hnsw commit log maintenance (combining) failed") 426 } 427 428 executed2, err := l.condenseOldLogs() 429 if err != nil { 430 l.logger.WithError(err). 431 WithField("action", "hnsw_commit_log_condensing"). 432 Error("hnsw commit log maintenance (condensing) failed") 433 } 434 return executed1 || executed2 435 } 436 437 func (l *hnswCommitLogger) SwitchCommitLogs(force bool) error { 438 _, err := l.switchCommitLogs(force) 439 return err 440 } 441 442 func (l *hnswCommitLogger) switchCommitLogs(force bool) (bool, error) { 443 l.Lock() 444 defer l.Unlock() 445 446 size, err := l.commitLogger.FileSize() 447 if err != nil { 448 return false, err 449 } 450 451 if size <= l.maxSizeIndividual && !force { 452 return false, nil 453 } 454 455 oldFileName, err := l.commitLogger.FileName() 456 if err != nil { 457 return false, err 458 } 459 460 if err := l.commitLogger.Close(); err != nil { 461 return true, err 462 } 463 464 // this is a new commit log, initialize with the current time stamp 465 fileName := fmt.Sprintf("%d", time.Now().Unix()) 466 467 if force { 468 l.logger.WithField("action", "commit_log_file_switched"). 469 WithField("id", l.id). 470 WithField("old_file_name", oldFileName). 471 WithField("old_file_size", size). 472 WithField("new_file_name", fileName). 473 Debug("commit log switched forced") 474 } else { 475 l.logger.WithField("action", "commit_log_file_switched"). 476 WithField("id", l.id). 477 WithField("old_file_name", oldFileName). 478 WithField("old_file_size", size). 479 WithField("new_file_name", fileName). 480 Info("commit log size crossed threshold, switching to new file") 481 } 482 483 fd, err := os.OpenFile(commitLogFileName(l.rootPath, l.id, fileName), 484 os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o666) 485 if err != nil { 486 return true, errors.Wrap(err, "create commit log file") 487 } 488 489 l.commitLogger = commitlog.NewLoggerWithFile(fd) 490 491 return true, nil 492 } 493 494 func (l *hnswCommitLogger) condenseOldLogs() (bool, error) { 495 files, err := getCommitFileNames(l.rootPath, l.id) 496 if err != nil { 497 return false, err 498 } 499 500 if len(files) <= 1 { 501 // if there are no files there is nothing to do 502 // if there is only a single file, it must still be in use, we can't do 503 // anything yet 504 return false, nil 505 } 506 507 // cut off last element, as that's never a candidate 508 candidates := files[:len(files)-1] 509 510 for _, candidate := range candidates { 511 if strings.HasSuffix(candidate, ".condensed") { 512 // don't attempt to condense logs which are already condensed 513 continue 514 } 515 516 return true, l.condensor.Do(candidate) 517 } 518 519 return false, nil 520 } 521 522 func (l *hnswCommitLogger) combineLogs() (bool, error) { 523 // maxSize is the desired final size, since we assume a lot of redundancy we 524 // can set the combining threshold higher than the final threshold under the 525 // assumption that the combined file will be considerably smaller than the 526 // sum of both input files 527 threshold := int64(float64(l.maxSizeCombining) * 1.75) 528 return NewCommitLogCombiner(l.rootPath, l.id, threshold, l.logger).Do() 529 } 530 531 func (l *hnswCommitLogger) Drop(ctx context.Context) error { 532 if err := l.commitLogger.Close(); err != nil { 533 return errors.Wrap(err, "close hnsw commit logger prior to delete") 534 } 535 536 // stop all goroutines 537 if err := l.Shutdown(ctx); err != nil { 538 return errors.Wrap(err, "drop commitlog") 539 } 540 541 // remove commit log directory if exists 542 dir := commitLogDirectory(l.rootPath, l.id) 543 if _, err := os.Stat(dir); err == nil { 544 err := os.RemoveAll(dir) 545 if err != nil { 546 return errors.Wrap(err, "delete commit files directory") 547 } 548 } 549 return nil 550 } 551 552 func (l *hnswCommitLogger) Flush() error { 553 l.Lock() 554 defer l.Unlock() 555 556 return l.commitLogger.Flush() 557 }