storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/data-update-tracker.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "bufio" 21 "bytes" 22 "context" 23 "encoding/binary" 24 "errors" 25 "io" 26 "io/ioutil" 27 "os" 28 "path" 29 "sort" 30 "strings" 31 "sync" 32 "time" 33 34 "github.com/willf/bloom" 35 36 "storj.io/minio/cmd/logger" 37 "storj.io/minio/pkg/color" 38 "storj.io/minio/pkg/console" 39 ) 40 41 const ( 42 // Estimate bloom filter size. With this many items 43 dataUpdateTrackerEstItems = 10000000 44 // ... we want this false positive rate: 45 dataUpdateTrackerFP = 0.99 46 dataUpdateTrackerQueueSize = 0 47 48 dataUpdateTrackerFilename = dataUsageBucket + SlashSeparator + ".tracker.bin" 49 dataUpdateTrackerVersion = 4 50 dataUpdateTrackerSaveInterval = 5 * time.Minute 51 ) 52 53 var ( 54 objectUpdatedCh chan<- string 55 intDataUpdateTracker *dataUpdateTracker 56 ) 57 58 func init() { 59 intDataUpdateTracker = newDataUpdateTracker() 60 objectUpdatedCh = intDataUpdateTracker.input 61 } 62 63 type dataUpdateTracker struct { 64 mu sync.Mutex 65 input chan string 66 save chan struct{} 67 debug bool 68 saveExited chan struct{} 69 dirty bool 70 71 Current dataUpdateFilter 72 History dataUpdateTrackerHistory 73 Saved time.Time 74 } 75 76 // newDataUpdateTracker returns a dataUpdateTracker with default settings. 77 func newDataUpdateTracker() *dataUpdateTracker { 78 d := &dataUpdateTracker{ 79 Current: dataUpdateFilter{ 80 idx: 1, 81 }, 82 debug: serverDebugLog, 83 input: make(chan string, dataUpdateTrackerQueueSize), 84 save: make(chan struct{}, 1), 85 saveExited: make(chan struct{}), 86 } 87 d.Current.bf = d.newBloomFilter() 88 d.dirty = true 89 return d 90 } 91 92 type dataUpdateTrackerHistory []dataUpdateFilter 93 94 type dataUpdateFilter struct { 95 idx uint64 96 bf bloomFilter 97 } 98 99 type bloomFilter struct { 100 *bloom.BloomFilter 101 } 102 103 // emptyBloomFilter returns an empty bloom filter. 104 func emptyBloomFilter() bloomFilter { 105 return bloomFilter{BloomFilter: &bloom.BloomFilter{}} 106 } 107 108 // containsDir returns whether the bloom filter contains a directory. 109 // Note that objects in XL mode are also considered directories. 110 func (b bloomFilter) containsDir(in string) bool { 111 split := splitPathDeterministic(path.Clean(in)) 112 113 if len(split) == 0 { 114 return false 115 } 116 return b.TestString(hashPath(path.Join(split...)).String()) 117 } 118 119 // bytes returns the bloom filter serialized as a byte slice. 120 func (b *bloomFilter) bytes() []byte { 121 if b == nil || b.BloomFilter == nil { 122 return nil 123 } 124 var buf bytes.Buffer 125 _, err := b.WriteTo(&buf) 126 if err != nil { 127 logger.LogIf(GlobalContext, err) 128 return nil 129 } 130 return buf.Bytes() 131 } 132 133 // sort the dataUpdateTrackerHistory, newest first. 134 // Returns whether the history is complete. 135 func (d dataUpdateTrackerHistory) sort() bool { 136 if len(d) == 0 { 137 return true 138 } 139 sort.Slice(d, func(i, j int) bool { 140 return d[i].idx > d[j].idx 141 }) 142 return d[0].idx-d[len(d)-1].idx == uint64(len(d)) 143 } 144 145 // removeOlderThan will remove entries older than index 'n'. 146 func (d *dataUpdateTrackerHistory) removeOlderThan(n uint64) { 147 d.sort() 148 dd := *d 149 end := len(dd) 150 for i := end - 1; i >= 0; i-- { 151 if dd[i].idx < n { 152 end = i 153 } 154 } 155 dd = dd[:end] 156 *d = dd 157 } 158 159 // newBloomFilter returns a new bloom filter with default settings. 160 func (d *dataUpdateTracker) newBloomFilter() bloomFilter { 161 return bloomFilter{bloom.NewWithEstimates(dataUpdateTrackerEstItems, dataUpdateTrackerFP)} 162 } 163 164 // current returns the current index. 165 func (d *dataUpdateTracker) current() uint64 { 166 d.mu.Lock() 167 defer d.mu.Unlock() 168 return d.Current.idx 169 } 170 171 // latestWithDir returns the highest index that contains the directory. 172 // This means that any cycle higher than this does NOT contain the entry. 173 func (d *dataUpdateTracker) latestWithDir(dir string) uint64 { 174 dateUpdateTrackerLogPrefix := color.Green("dataUpdateTracker:") 175 bucket, _ := path2BucketObjectWithBasePath("", dir) 176 if bucket == "" { 177 if d.debug && len(dir) > 0 { 178 console.Debugf(dateUpdateTrackerLogPrefix+" no bucket (%s)\n", dir) 179 } 180 return d.current() 181 } 182 if isReservedOrInvalidBucket(bucket, false) { 183 if d.debug { 184 console.Debugf(dateUpdateTrackerLogPrefix+" isReservedOrInvalidBucket: %v, entry: %v\n", bucket, dir) 185 } 186 return d.current() 187 } 188 189 d.mu.Lock() 190 defer d.mu.Unlock() 191 if d.Current.bf.containsDir(dir) || d.Current.idx == 0 { 192 return d.Current.idx 193 } 194 if d.debug { 195 console.Debugf(dateUpdateTrackerLogPrefix+" current bloom does NOT contains dir %s\n", dir) 196 } 197 198 idx := d.Current.idx - 1 199 for { 200 f := d.History.find(idx) 201 if f == nil || f.bf.containsDir(dir) || idx == 0 { 202 break 203 } 204 idx-- 205 } 206 return idx 207 } 208 209 // start will load the current data from the drives start collecting information and 210 // start a saver goroutine. 211 // All of these will exit when the context is canceled. 212 func (d *dataUpdateTracker) start(ctx context.Context, drives ...string) { 213 if len(drives) <= 0 { 214 logger.LogIf(ctx, errors.New("dataUpdateTracker.start: No drives specified")) 215 return 216 } 217 d.load(ctx, drives...) 218 go d.startCollector(ctx) 219 // startSaver will unlock. 220 d.mu.Lock() 221 go d.startSaver(ctx, dataUpdateTrackerSaveInterval, drives) 222 } 223 224 // load will attempt to load data tracking information from the supplied drives. 225 // The data will only be loaded if d.Saved is older than the one found on disk. 226 // The newest working cache will be kept in d. 227 // If no valid data usage tracker can be found d will remain unchanged. 228 // If object is shared the caller should lock it. 229 func (d *dataUpdateTracker) load(ctx context.Context, drives ...string) { 230 if len(drives) <= 0 { 231 logger.LogIf(ctx, errors.New("dataUpdateTracker.load: No drives specified")) 232 return 233 } 234 for _, drive := range drives { 235 236 cacheFormatPath := pathJoin(drive, dataUpdateTrackerFilename) 237 f, err := os.Open(cacheFormatPath) 238 if err != nil { 239 if osIsNotExist(err) { 240 continue 241 } 242 logger.LogIf(ctx, err) 243 continue 244 } 245 err = d.deserialize(f, d.Saved) 246 if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { 247 logger.LogIf(ctx, err) 248 } 249 f.Close() 250 } 251 } 252 253 // startSaver will start a saver that will write d to all supplied drives at specific intervals. 254 // 'd' must be write locked when started and will be unlocked. 255 // The saver will save and exit when supplied context is closed. 256 func (d *dataUpdateTracker) startSaver(ctx context.Context, interval time.Duration, drives []string) { 257 saveNow := d.save 258 exited := make(chan struct{}) 259 d.saveExited = exited 260 d.mu.Unlock() 261 t := time.NewTicker(interval) 262 defer t.Stop() 263 defer close(exited) 264 var buf bytes.Buffer 265 for { 266 var exit bool 267 select { 268 case <-ctx.Done(): 269 exit = true 270 case <-t.C: 271 case <-saveNow: 272 } 273 buf.Reset() 274 d.mu.Lock() 275 if !d.dirty { 276 d.mu.Unlock() 277 if exit { 278 return 279 } 280 continue 281 } 282 d.Saved = UTCNow() 283 err := d.serialize(&buf) 284 if d.debug { 285 console.Debugf(color.Green("dataUpdateTracker:")+" Saving: %v bytes, Current idx: %v\n", buf.Len(), d.Current.idx) 286 } 287 d.dirty = false 288 d.mu.Unlock() 289 if err != nil { 290 logger.LogIf(ctx, err, "Error serializing usage tracker data") 291 if exit { 292 return 293 } 294 continue 295 } 296 if buf.Len() == 0 { 297 logger.LogIf(ctx, errors.New("zero sized output, skipping save")) 298 continue 299 } 300 for _, drive := range drives { 301 cacheFormatPath := pathJoin(drive, dataUpdateTrackerFilename) 302 err := ioutil.WriteFile(cacheFormatPath, buf.Bytes(), os.ModePerm) 303 if err != nil { 304 if osIsNotExist(err) { 305 continue 306 } 307 logger.LogIf(ctx, err) 308 continue 309 } 310 } 311 if exit { 312 return 313 } 314 } 315 } 316 317 // serialize all data in d to dst. 318 // Caller should hold lock if d is expected to be shared. 319 // If an error is returned, there will likely be partial data written to dst. 320 func (d *dataUpdateTracker) serialize(dst io.Writer) (err error) { 321 ctx := GlobalContext 322 var tmp [8]byte 323 o := bufio.NewWriter(dst) 324 defer func() { 325 if err == nil { 326 err = o.Flush() 327 } 328 }() 329 330 // Version 331 if err := o.WriteByte(dataUpdateTrackerVersion); err != nil { 332 if d.debug { 333 logger.LogIf(ctx, err) 334 } 335 return err 336 } 337 // Timestamp. 338 binary.LittleEndian.PutUint64(tmp[:], uint64(d.Saved.Unix())) 339 if _, err := o.Write(tmp[:]); err != nil { 340 if d.debug { 341 logger.LogIf(ctx, err) 342 } 343 return err 344 } 345 346 // Current 347 binary.LittleEndian.PutUint64(tmp[:], d.Current.idx) 348 if _, err := o.Write(tmp[:]); err != nil { 349 if d.debug { 350 logger.LogIf(ctx, err) 351 } 352 return err 353 } 354 355 if _, err := d.Current.bf.WriteTo(o); err != nil { 356 if d.debug { 357 logger.LogIf(ctx, err) 358 } 359 return err 360 } 361 362 // History 363 binary.LittleEndian.PutUint64(tmp[:], uint64(len(d.History))) 364 if _, err := o.Write(tmp[:]); err != nil { 365 if d.debug { 366 logger.LogIf(ctx, err) 367 } 368 return err 369 } 370 371 for _, bf := range d.History { 372 // Current 373 binary.LittleEndian.PutUint64(tmp[:], bf.idx) 374 if _, err := o.Write(tmp[:]); err != nil { 375 if d.debug { 376 logger.LogIf(ctx, err) 377 } 378 return err 379 } 380 381 if _, err := bf.bf.WriteTo(o); err != nil { 382 if d.debug { 383 logger.LogIf(ctx, err) 384 } 385 return err 386 } 387 } 388 return nil 389 } 390 391 // deserialize will deserialize the supplied input if the input is newer than the supplied time. 392 func (d *dataUpdateTracker) deserialize(src io.Reader, newerThan time.Time) error { 393 ctx := GlobalContext 394 var dst dataUpdateTracker 395 var tmp [8]byte 396 397 // Version 398 if _, err := io.ReadFull(src, tmp[:1]); err != nil { 399 if d.debug { 400 if err != io.EOF { 401 logger.LogIf(ctx, err) 402 } 403 } 404 return err 405 } 406 switch tmp[0] { 407 case 1, 2, 3: 408 console.Println(color.Green("dataUpdateTracker: ") + "deprecated data version, updating.") 409 return nil 410 case dataUpdateTrackerVersion: 411 default: 412 return errors.New("dataUpdateTracker: Unknown data version") 413 } 414 // Timestamp. 415 if _, err := io.ReadFull(src, tmp[:8]); err != nil { 416 if d.debug { 417 logger.LogIf(ctx, err) 418 } 419 return err 420 } 421 t := time.Unix(int64(binary.LittleEndian.Uint64(tmp[:])), 0) 422 if !t.After(newerThan) { 423 return nil 424 } 425 426 // Current 427 if _, err := io.ReadFull(src, tmp[:8]); err != nil { 428 if d.debug { 429 logger.LogIf(ctx, err) 430 } 431 return err 432 } 433 dst.Current.idx = binary.LittleEndian.Uint64(tmp[:]) 434 dst.Current.bf = emptyBloomFilter() 435 if _, err := dst.Current.bf.ReadFrom(src); err != nil { 436 if d.debug { 437 logger.LogIf(ctx, err) 438 } 439 return err 440 } 441 442 // History 443 if _, err := io.ReadFull(src, tmp[:8]); err != nil { 444 if d.debug { 445 logger.LogIf(ctx, err) 446 } 447 return err 448 } 449 n := binary.LittleEndian.Uint64(tmp[:]) 450 dst.History = make(dataUpdateTrackerHistory, int(n)) 451 for i, e := range dst.History { 452 if _, err := io.ReadFull(src, tmp[:8]); err != nil { 453 if d.debug { 454 logger.LogIf(ctx, err) 455 } 456 return err 457 } 458 e.idx = binary.LittleEndian.Uint64(tmp[:]) 459 e.bf = emptyBloomFilter() 460 if _, err := e.bf.ReadFrom(src); err != nil { 461 if d.debug { 462 logger.LogIf(ctx, err) 463 } 464 return err 465 } 466 dst.History[i] = e 467 } 468 // Ignore what remains on the stream. 469 // Update d: 470 d.mu.Lock() 471 defer d.mu.Unlock() 472 d.Current = dst.Current 473 d.History = dst.History 474 d.Saved = dst.Saved 475 return nil 476 } 477 478 // start a collector that picks up entries from objectUpdatedCh 479 // and adds them to the current bloom filter. 480 func (d *dataUpdateTracker) startCollector(ctx context.Context) { 481 for in := range d.input { 482 bucket, _ := path2BucketObjectWithBasePath("", in) 483 if bucket == "" { 484 if d.debug && len(in) > 0 { 485 console.Debugf(color.Green("dataUpdateTracker:")+" no bucket (%s)\n", in) 486 } 487 continue 488 } 489 490 if isReservedOrInvalidBucket(bucket, false) { 491 if d.debug { 492 console.Debugf(color.Green("dataUpdateTracker:")+" isReservedOrInvalidBucket: %v, entry: %v\n", bucket, in) 493 } 494 continue 495 } 496 split := splitPathDeterministic(in) 497 498 // Add all paths until done. 499 d.mu.Lock() 500 for i := range split { 501 if d.debug { 502 console.Debugln(color.Green("dataUpdateTracker:") + " Marking path dirty: " + color.Blue(path.Join(split[:i+1]...))) 503 } 504 d.Current.bf.AddString(hashPath(path.Join(split[:i+1]...)).String()) 505 } 506 d.dirty = d.dirty || len(split) > 0 507 d.mu.Unlock() 508 } 509 } 510 511 // markDirty adds the supplied path to the current bloom filter. 512 func (d *dataUpdateTracker) markDirty(in string) { 513 bucket, _ := path2BucketObjectWithBasePath("", in) 514 dateUpdateTrackerLogPrefix := color.Green("dataUpdateTracker:") 515 if bucket == "" { 516 if d.debug && len(in) > 0 { 517 console.Debugf(dateUpdateTrackerLogPrefix+" no bucket (%s)\n", in) 518 } 519 return 520 } 521 522 if isReservedOrInvalidBucket(bucket, false) { 523 if d.debug && false { 524 console.Debugf(dateUpdateTrackerLogPrefix+" isReservedOrInvalidBucket: %v, entry: %v\n", bucket, in) 525 } 526 return 527 } 528 split := splitPathDeterministic(in) 529 530 // Add all paths until done. 531 d.mu.Lock() 532 for i := range split { 533 if d.debug { 534 console.Debugln(dateUpdateTrackerLogPrefix + " Marking path dirty: " + color.Blue(path.Join(split[:i+1]...))) 535 } 536 d.Current.bf.AddString(hashPath(path.Join(split[:i+1]...)).String()) 537 } 538 d.dirty = d.dirty || len(split) > 0 539 d.mu.Unlock() 540 } 541 542 // find entry with specified index. 543 // Returns nil if not found. 544 func (d dataUpdateTrackerHistory) find(idx uint64) *dataUpdateFilter { 545 for _, f := range d { 546 if f.idx == idx { 547 return &f 548 } 549 } 550 return nil 551 } 552 553 // filterFrom will return a combined bloom filter. 554 func (d *dataUpdateTracker) filterFrom(ctx context.Context, oldest, newest uint64) *bloomFilterResponse { 555 bf := d.newBloomFilter() 556 bfr := bloomFilterResponse{ 557 OldestIdx: oldest, 558 CurrentIdx: d.Current.idx, 559 Complete: true, 560 } 561 // Loop through each index requested. 562 for idx := oldest; idx <= newest; idx++ { 563 v := d.History.find(idx) 564 if v == nil { 565 if d.Current.idx == idx { 566 // Merge current. 567 err := bf.Merge(d.Current.bf.BloomFilter) 568 logger.LogIf(ctx, err) 569 if err != nil { 570 bfr.Complete = false 571 } 572 continue 573 } 574 bfr.Complete = false 575 bfr.OldestIdx = idx + 1 576 continue 577 } 578 579 err := bf.Merge(v.bf.BloomFilter) 580 if err != nil { 581 bfr.Complete = false 582 logger.LogIf(ctx, err) 583 continue 584 } 585 bfr.NewestIdx = idx 586 } 587 var dst bytes.Buffer 588 _, err := bf.WriteTo(&dst) 589 if err != nil { 590 logger.LogIf(ctx, err) 591 return nil 592 } 593 bfr.Filter = dst.Bytes() 594 595 return &bfr 596 } 597 598 // cycleFilter will cycle the bloom filter to start recording to index y if not already. 599 // The response will contain a bloom filter starting at index x up to, but not including index y. 600 // If y is 0, the response will not update y, but return the currently recorded information 601 // from the oldest (unless 0, then it will be all) until and including current y. 602 func (d *dataUpdateTracker) cycleFilter(ctx context.Context, req bloomFilterRequest) (*bloomFilterResponse, error) { 603 if req.OldestClean != "" { 604 return &bloomFilterResponse{OldestIdx: d.latestWithDir(req.OldestClean)}, nil 605 } 606 current := req.Current 607 oldest := req.Oldest 608 d.mu.Lock() 609 defer d.mu.Unlock() 610 if current == 0 { 611 if len(d.History) == 0 { 612 return d.filterFrom(ctx, d.Current.idx, d.Current.idx), nil 613 } 614 d.History.sort() 615 if oldest == 0 { 616 oldest = d.History[len(d.History)-1].idx 617 } 618 return d.filterFrom(ctx, oldest, d.Current.idx), nil 619 } 620 621 // Move current to history if new one requested 622 if d.Current.idx != current { 623 d.dirty = true 624 if d.debug { 625 console.Debugf(color.Green("dataUpdateTracker:")+" cycle bloom filter: %v -> %v\n", d.Current.idx, current) 626 } 627 628 d.History = append(d.History, d.Current) 629 d.Current.idx = current 630 d.Current.bf = d.newBloomFilter() 631 select { 632 case d.save <- struct{}{}: 633 default: 634 } 635 } 636 d.History.removeOlderThan(oldest) 637 return d.filterFrom(ctx, oldest, current), nil 638 } 639 640 // splitPathDeterministic will split the provided relative path 641 // deterministically and return up to the first 3 elements of the path. 642 // slash and dot prefixes are removed. 643 // Trailing slashes are removed. 644 // Returns 0 length if no parts are found after trimming. 645 func splitPathDeterministic(in string) []string { 646 split := strings.Split(decodeDirObject(in), SlashSeparator) 647 648 // Trim empty start/end 649 for len(split) > 0 { 650 if len(split[0]) > 0 && split[0] != "." { 651 break 652 } 653 split = split[1:] 654 } 655 for len(split) > 0 { 656 if len(split[len(split)-1]) > 0 { 657 break 658 } 659 split = split[:len(split)-1] 660 } 661 662 return split 663 } 664 665 // bloomFilterRequest request bloom filters. 666 // Current index will be updated to current and entries back to Oldest is returned. 667 type bloomFilterRequest struct { 668 Oldest uint64 669 Current uint64 670 // If set the oldest clean version will be returned in OldestIdx 671 // and the rest of the request will be ignored. 672 OldestClean string 673 } 674 675 type bloomFilterResponse struct { 676 // Current index being written to. 677 CurrentIdx uint64 678 // Oldest index in the returned bloom filter. 679 OldestIdx uint64 680 // Newest Index in the returned bloom filter. 681 NewestIdx uint64 682 // Are all indexes between oldest and newest filled? 683 Complete bool 684 // Binary data of the bloom filter. 685 Filter []byte 686 } 687 688 // ObjectPathUpdated indicates a path has been updated. 689 // The function will block until the entry has been picked up. 690 func ObjectPathUpdated(s string) { 691 if intDataUpdateTracker != nil { 692 intDataUpdateTracker.markDirty(s) 693 } 694 }