github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/data-usage_test.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "bytes" 22 "context" 23 "encoding/json" 24 "fmt" 25 "os" 26 "path" 27 "path/filepath" 28 "testing" 29 ) 30 31 type usageTestFile struct { 32 name string 33 size int 34 } 35 36 func TestDataUsageUpdate(t *testing.T) { 37 base := t.TempDir() 38 const bucket = "bucket" 39 files := []usageTestFile{ 40 {name: "rootfile", size: 10000}, 41 {name: "rootfile2", size: 10000}, 42 {name: "dir1/d1file", size: 2000}, 43 {name: "dir2/d2file", size: 300}, 44 {name: "dir1/dira/dafile", size: 100000}, 45 {name: "dir1/dira/dbfile", size: 200000}, 46 {name: "dir1/dira/dirasub/dcfile", size: 1000000}, 47 {name: "dir1/dira/dirasub/sublevel3/dccccfile", size: 10}, 48 } 49 createUsageTestFiles(t, base, bucket, files) 50 51 getSize := func(item scannerItem) (sizeS sizeSummary, err error) { 52 if item.Typ&os.ModeDir == 0 { 53 var s os.FileInfo 54 s, err = os.Stat(item.Path) 55 if err != nil { 56 return 57 } 58 sizeS.totalSize = s.Size() 59 sizeS.versions++ 60 return sizeS, nil 61 } 62 return 63 } 64 65 weSleep := func() bool { return false } 66 67 got, err := scanDataFolder(context.Background(), nil, base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0, weSleep) 68 if err != nil { 69 t.Fatal(err) 70 } 71 72 // Test dirs 73 want := []struct { 74 path string 75 isNil bool 76 size, objs int 77 flatten bool 78 oSizes sizeHistogram 79 }{ 80 { 81 path: "/", 82 size: 1322310, 83 flatten: true, 84 objs: 8, 85 oSizes: sizeHistogram{0: 2, 1: 3, 2: 2, 4: 1}, 86 }, 87 { 88 path: "/", 89 size: 20000, 90 objs: 2, 91 oSizes: sizeHistogram{1: 2}, 92 }, 93 { 94 path: "/dir1", 95 size: 1302010, 96 objs: 5, 97 oSizes: sizeHistogram{0: 1, 1: 1, 2: 2, 4: 1}, 98 }, 99 { 100 path: "/dir1/dira", 101 isNil: true, 102 }, 103 { 104 path: "/nonexistying", 105 isNil: true, 106 }, 107 } 108 109 for _, w := range want { 110 p := path.Join(bucket, w.path) 111 t.Run(p, func(t *testing.T) { 112 e := got.find(p) 113 if w.isNil { 114 if e != nil { 115 t.Error("want nil, got", e) 116 } 117 return 118 } 119 if e == nil { 120 t.Fatal("got nil result") 121 } 122 if w.flatten { 123 *e = got.flatten(*e) 124 } 125 if e.Size != int64(w.size) { 126 t.Error("got size", e.Size, "want", w.size) 127 } 128 if e.Objects != uint64(w.objs) { 129 t.Error("got objects", e.Objects, "want", w.objs) 130 } 131 if e.Versions != uint64(w.objs) { 132 t.Error("got versions", e.Versions, "want", w.objs) 133 } 134 if e.ObjSizes != w.oSizes { 135 t.Error("got histogram", e.ObjSizes, "want", w.oSizes) 136 } 137 }) 138 } 139 140 files = []usageTestFile{ 141 { 142 name: "newfolder/afile", 143 size: 4, 144 }, 145 { 146 name: "newfolder/anotherone", 147 size: 1, 148 }, 149 { 150 name: "newfolder/anemptyone", 151 size: 0, 152 }, 153 { 154 name: "dir1/fileindir1", 155 size: 20000, 156 }, 157 { 158 name: "dir1/dirc/fileindirc", 159 size: 20000, 160 }, 161 { 162 name: "rootfile3", 163 size: 1000, 164 }, 165 { 166 name: "dir1/dira/dirasub/fileindira2", 167 size: 200, 168 }, 169 } 170 createUsageTestFiles(t, base, bucket, files) 171 err = os.RemoveAll(filepath.Join(base, bucket, "dir1/dira/dirasub/dcfile")) 172 if err != nil { 173 t.Fatal(err) 174 } 175 // Changed dir must be picked up in this many cycles. 176 for i := 0; i < dataUsageUpdateDirCycles; i++ { 177 got, err = scanDataFolder(context.Background(), nil, base, got, getSize, 0, weSleep) 178 got.Info.NextCycle++ 179 if err != nil { 180 t.Fatal(err) 181 } 182 } 183 184 want = []struct { 185 path string 186 isNil bool 187 size, objs int 188 flatten bool 189 oSizes sizeHistogram 190 }{ 191 { 192 path: "/", 193 size: 363515, 194 flatten: true, 195 objs: 14, 196 oSizes: sizeHistogram{0: 7, 1: 5, 2: 2}, 197 }, 198 { 199 path: "/dir1", 200 size: 342210, 201 objs: 7, 202 flatten: false, 203 oSizes: sizeHistogram{0: 2, 1: 3, 2: 2}, 204 }, 205 { 206 path: "/newfolder", 207 size: 5, 208 objs: 3, 209 oSizes: sizeHistogram{0: 3}, 210 }, 211 { 212 path: "/nonexistying", 213 isNil: true, 214 }, 215 } 216 217 for _, w := range want { 218 p := path.Join(bucket, w.path) 219 t.Run(p, func(t *testing.T) { 220 e := got.find(p) 221 if w.isNil { 222 if e != nil { 223 t.Error("want nil, got", e) 224 } 225 return 226 } 227 if e == nil { 228 t.Fatal("got nil result") 229 } 230 if w.flatten { 231 *e = got.flatten(*e) 232 } 233 if e.Size != int64(w.size) { 234 t.Error("got size", e.Size, "want", w.size) 235 } 236 if e.Objects != uint64(w.objs) { 237 t.Error("got objects", e.Objects, "want", w.objs) 238 } 239 if e.Versions != uint64(w.objs) { 240 t.Error("got versions", e.Versions, "want", w.objs) 241 } 242 if e.ObjSizes != w.oSizes { 243 t.Error("got histogram", e.ObjSizes, "want", w.oSizes) 244 } 245 }) 246 } 247 } 248 249 func TestDataUsageUpdatePrefix(t *testing.T) { 250 base := t.TempDir() 251 scannerSleeper.Update(0, 0) 252 files := []usageTestFile{ 253 {name: "bucket/rootfile", size: 10000}, 254 {name: "bucket/rootfile2", size: 10000}, 255 {name: "bucket/dir1/d1file", size: 2000}, 256 {name: "bucket/dir2/d2file", size: 300}, 257 {name: "bucket/dir1/dira/dafile", size: 100000}, 258 {name: "bucket/dir1/dira/dbfile", size: 200000}, 259 {name: "bucket/dir1/dira/dirasub/dcfile", size: 1000000}, 260 {name: "bucket/dir1/dira/dirasub/sublevel3/dccccfile", size: 10}, 261 } 262 createUsageTestFiles(t, base, "", files) 263 const foldersBelow = 3 264 const filesBelowT = dataScannerCompactLeastObject / 2 265 const filesAboveT = dataScannerCompactAtFolders + 1 266 const expectSize = foldersBelow*filesBelowT + filesAboveT 267 268 generateUsageTestFiles(t, base, "bucket/dirwithalot", foldersBelow, filesBelowT, 1) 269 generateUsageTestFiles(t, base, "bucket/dirwithevenmore", filesAboveT, 1, 1) 270 271 getSize := func(item scannerItem) (sizeS sizeSummary, err error) { 272 if item.Typ&os.ModeDir == 0 { 273 var s os.FileInfo 274 s, err = os.Stat(item.Path) 275 if err != nil { 276 return 277 } 278 sizeS.totalSize = s.Size() 279 sizeS.versions++ 280 return 281 } 282 return 283 } 284 285 weSleep := func() bool { return false } 286 287 got, err := scanDataFolder(context.Background(), nil, base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize, 0, weSleep) 288 if err != nil { 289 t.Fatal(err) 290 } 291 if got.root() == nil { 292 t.Log("cached folders:") 293 for folder := range got.Cache { 294 t.Log("folder:", folder) 295 } 296 t.Fatal("got nil root.") 297 } 298 299 // Test dirs 300 want := []struct { 301 path string 302 isNil bool 303 size, objs int 304 oSizes sizeHistogram 305 }{ 306 { 307 path: "flat", 308 size: 1322310 + expectSize, 309 objs: 8 + expectSize, 310 oSizes: sizeHistogram{0: 2 + expectSize, 1: 3, 2: 2, 4: 1}, 311 }, 312 { 313 path: "bucket/", 314 size: 20000, 315 objs: 2, 316 oSizes: sizeHistogram{1: 2}, 317 }, 318 { 319 // Gets compacted... 320 path: "bucket/dir1", 321 size: 1302010, 322 objs: 5, 323 oSizes: sizeHistogram{0: 1, 1: 1, 2: 2, 4: 1}, 324 }, 325 { 326 // Gets compacted at this level... 327 path: "bucket/dirwithalot/0", 328 size: filesBelowT, 329 objs: filesBelowT, 330 oSizes: sizeHistogram{0: filesBelowT}, 331 }, 332 { 333 // Gets compacted at this level (below obj threshold)... 334 path: "bucket/dirwithalot/0", 335 size: filesBelowT, 336 objs: filesBelowT, 337 oSizes: sizeHistogram{0: filesBelowT}, 338 }, 339 { 340 // Gets compacted at this level... 341 path: "bucket/dirwithevenmore", 342 size: filesAboveT, 343 objs: filesAboveT, 344 oSizes: sizeHistogram{0: filesAboveT}, 345 }, 346 { 347 path: "bucket/nonexistying", 348 isNil: true, 349 }, 350 } 351 352 for _, w := range want { 353 t.Run(w.path, func(t *testing.T) { 354 e := got.find(w.path) 355 if w.path == "flat" { 356 f := got.flatten(*got.root()) 357 e = &f 358 } 359 if w.isNil { 360 if e != nil { 361 t.Error("want nil, got", e) 362 } 363 return 364 } 365 if e == nil { 366 t.Fatal("got nil result") 367 return 368 } 369 if e.Size != int64(w.size) { 370 t.Error("got size", e.Size, "want", w.size) 371 } 372 if e.Objects != uint64(w.objs) { 373 t.Error("got objects", e.Objects, "want", w.objs) 374 } 375 if e.Versions != uint64(w.objs) { 376 t.Error("got versions", e.Versions, "want", w.objs) 377 } 378 if e.ObjSizes != w.oSizes { 379 t.Error("got histogram", e.ObjSizes, "want", w.oSizes) 380 } 381 }) 382 } 383 384 files = []usageTestFile{ 385 { 386 name: "bucket/newfolder/afile", 387 size: 4, 388 }, 389 { 390 name: "bucket/newfolder/anotherone", 391 size: 1, 392 }, 393 { 394 name: "bucket/newfolder/anemptyone", 395 size: 0, 396 }, 397 { 398 name: "bucket/dir1/fileindir1", 399 size: 20000, 400 }, 401 { 402 name: "bucket/dir1/dirc/fileindirc", 403 size: 20000, 404 }, 405 { 406 name: "bucket/rootfile3", 407 size: 1000, 408 }, 409 { 410 name: "bucket/dir1/dira/dirasub/fileindira2", 411 size: 200, 412 }, 413 } 414 415 createUsageTestFiles(t, base, "", files) 416 err = os.RemoveAll(filepath.Join(base, "bucket/dir1/dira/dirasub/dcfile")) 417 if err != nil { 418 t.Fatal(err) 419 } 420 // Changed dir must be picked up in this many cycles. 421 for i := 0; i < dataUsageUpdateDirCycles; i++ { 422 got, err = scanDataFolder(context.Background(), nil, base, got, getSize, 0, weSleep) 423 got.Info.NextCycle++ 424 if err != nil { 425 t.Fatal(err) 426 } 427 } 428 429 want = []struct { 430 path string 431 isNil bool 432 size, objs int 433 oSizes sizeHistogram 434 }{ 435 { 436 path: "flat", 437 size: 363515 + expectSize, 438 objs: 14 + expectSize, 439 oSizes: sizeHistogram{0: 7 + expectSize, 1: 5, 2: 2}, 440 }, 441 { 442 path: "bucket/dir1", 443 size: 342210, 444 objs: 7, 445 oSizes: sizeHistogram{0: 2, 1: 3, 2: 2}, 446 }, 447 { 448 path: "bucket/", 449 size: 21000, 450 objs: 3, 451 oSizes: sizeHistogram{0: 1, 1: 2}, 452 }, 453 { 454 path: "bucket/newfolder", 455 size: 5, 456 objs: 3, 457 oSizes: sizeHistogram{0: 3}, 458 }, 459 { 460 // Compacted into bucket/dir1 461 path: "bucket/dir1/dira", 462 isNil: true, 463 }, 464 { 465 path: "bucket/nonexistying", 466 isNil: true, 467 }, 468 } 469 470 for _, w := range want { 471 t.Run(w.path, func(t *testing.T) { 472 e := got.find(w.path) 473 if w.path == "flat" { 474 f := got.flatten(*got.root()) 475 e = &f 476 } 477 if w.isNil { 478 if e != nil { 479 t.Error("want nil, got", e) 480 } 481 return 482 } 483 if e == nil { 484 t.Error("got nil result") 485 return 486 } 487 if e.Size != int64(w.size) { 488 t.Error("got size", e.Size, "want", w.size) 489 } 490 if e.Objects != uint64(w.objs) { 491 t.Error("got objects", e.Objects, "want", w.objs) 492 } 493 if e.Versions != uint64(w.objs) { 494 t.Error("got versions", e.Versions, "want", w.objs) 495 } 496 if e.ObjSizes != w.oSizes { 497 t.Error("got histogram", e.ObjSizes, "want", w.oSizes) 498 } 499 }) 500 } 501 } 502 503 func createUsageTestFiles(t *testing.T, base, bucket string, files []usageTestFile) { 504 for _, f := range files { 505 err := os.MkdirAll(filepath.Dir(filepath.Join(base, bucket, f.name)), os.ModePerm) 506 if err != nil { 507 t.Fatal(err) 508 } 509 err = os.WriteFile(filepath.Join(base, bucket, f.name), make([]byte, f.size), os.ModePerm) 510 if err != nil { 511 t.Fatal(err) 512 } 513 } 514 } 515 516 // generateUsageTestFiles create nFolders * nFiles files of size bytes each. 517 func generateUsageTestFiles(t *testing.T, base, bucket string, nFolders, nFiles, size int) { 518 pl := make([]byte, size) 519 for i := 0; i < nFolders; i++ { 520 name := filepath.Join(base, bucket, fmt.Sprint(i), "0.txt") 521 err := os.MkdirAll(filepath.Dir(name), os.ModePerm) 522 if err != nil { 523 t.Fatal(err) 524 } 525 for j := 0; j < nFiles; j++ { 526 name := filepath.Join(base, bucket, fmt.Sprint(i), fmt.Sprint(j)+".txt") 527 err = os.WriteFile(name, pl, os.ModePerm) 528 if err != nil { 529 t.Fatal(err) 530 } 531 } 532 } 533 } 534 535 func TestDataUsageCacheSerialize(t *testing.T) { 536 base := t.TempDir() 537 const bucket = "abucket" 538 files := []usageTestFile{ 539 {name: "rootfile", size: 10000}, 540 {name: "rootfile2", size: 10000}, 541 {name: "dir1/d1file", size: 2000}, 542 {name: "dir2/d2file", size: 300}, 543 {name: "dir2/d2file2", size: 300}, 544 {name: "dir2/d2file3/", size: 300}, 545 {name: "dir2/d2file4/", size: 300}, 546 {name: "dir2/d2file5", size: 300}, 547 {name: "dir1/dira/dafile", size: 100000}, 548 {name: "dir1/dira/dbfile", size: 200000}, 549 {name: "dir1/dira/dirasub/dcfile", size: 1000000}, 550 {name: "dir1/dira/dirasub/sublevel3/dccccfile", size: 10}, 551 {name: "dir1/dira/dirasub/sublevel3/dccccfile20", size: 20}, 552 {name: "dir1/dira/dirasub/sublevel3/dccccfile30", size: 30}, 553 {name: "dir1/dira/dirasub/sublevel3/dccccfile40", size: 40}, 554 } 555 createUsageTestFiles(t, base, bucket, files) 556 557 getSize := func(item scannerItem) (sizeS sizeSummary, err error) { 558 if item.Typ&os.ModeDir == 0 { 559 var s os.FileInfo 560 s, err = os.Stat(item.Path) 561 if err != nil { 562 return 563 } 564 sizeS.versions++ 565 sizeS.totalSize = s.Size() 566 return 567 } 568 return 569 } 570 weSleep := func() bool { return false } 571 want, err := scanDataFolder(context.Background(), nil, base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0, weSleep) 572 if err != nil { 573 t.Fatal(err) 574 } 575 e := want.find("abucket/dir2") 576 e.ReplicationStats = &replicationAllStats{ 577 Targets: map[string]replicationStats{ 578 "arn": { 579 PendingSize: 1, 580 ReplicatedSize: 2, 581 FailedSize: 3, 582 FailedCount: 5, 583 PendingCount: 6, 584 }, 585 }, 586 } 587 want.replace("abucket/dir2", "", *e) 588 var buf bytes.Buffer 589 err = want.serializeTo(&buf) 590 if err != nil { 591 t.Fatal(err) 592 } 593 t.Log("serialized size:", buf.Len(), "bytes") 594 var got dataUsageCache 595 err = got.deserialize(&buf) 596 if err != nil { 597 t.Fatal(err) 598 } 599 if got.Info.LastUpdate.IsZero() { 600 t.Error("lastupdate not set") 601 } 602 603 if !want.Info.LastUpdate.Equal(got.Info.LastUpdate) { 604 t.Fatalf("deserialize LastUpdate mismatch\nwant: %+v\ngot: %+v", want, got) 605 } 606 if len(want.Cache) != len(got.Cache) { 607 t.Errorf("deserialize mismatch length\nwant: %+v\ngot: %+v", len(want.Cache), len(got.Cache)) 608 } 609 for wkey, wval := range want.Cache { 610 gotv := got.Cache[wkey] 611 if !equalAsJSON(gotv, wval) { 612 t.Errorf("deserialize mismatch, key %v\nwant: %#v\ngot: %#v", wkey, wval, gotv) 613 } 614 } 615 } 616 617 // equalAsJSON returns whether the values are equal when encoded as JSON. 618 func equalAsJSON(a, b interface{}) bool { 619 aj, err := json.Marshal(a) 620 if err != nil { 621 panic(err) 622 } 623 bj, err := json.Marshal(b) 624 if err != nil { 625 panic(err) 626 } 627 return bytes.Equal(aj, bj) 628 }