github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/data-usage_test.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"encoding/json"
    24  	"fmt"
    25  	"os"
    26  	"path"
    27  	"path/filepath"
    28  	"testing"
    29  )
    30  
    31  type usageTestFile struct {
    32  	name string
    33  	size int
    34  }
    35  
    36  func TestDataUsageUpdate(t *testing.T) {
    37  	base := t.TempDir()
    38  	const bucket = "bucket"
    39  	files := []usageTestFile{
    40  		{name: "rootfile", size: 10000},
    41  		{name: "rootfile2", size: 10000},
    42  		{name: "dir1/d1file", size: 2000},
    43  		{name: "dir2/d2file", size: 300},
    44  		{name: "dir1/dira/dafile", size: 100000},
    45  		{name: "dir1/dira/dbfile", size: 200000},
    46  		{name: "dir1/dira/dirasub/dcfile", size: 1000000},
    47  		{name: "dir1/dira/dirasub/sublevel3/dccccfile", size: 10},
    48  	}
    49  	createUsageTestFiles(t, base, bucket, files)
    50  
    51  	getSize := func(item scannerItem) (sizeS sizeSummary, err error) {
    52  		if item.Typ&os.ModeDir == 0 {
    53  			var s os.FileInfo
    54  			s, err = os.Stat(item.Path)
    55  			if err != nil {
    56  				return
    57  			}
    58  			sizeS.totalSize = s.Size()
    59  			sizeS.versions++
    60  			return sizeS, nil
    61  		}
    62  		return
    63  	}
    64  
    65  	weSleep := func() bool { return false }
    66  
    67  	got, err := scanDataFolder(context.Background(), nil, base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0, weSleep)
    68  	if err != nil {
    69  		t.Fatal(err)
    70  	}
    71  
    72  	// Test dirs
    73  	want := []struct {
    74  		path       string
    75  		isNil      bool
    76  		size, objs int
    77  		flatten    bool
    78  		oSizes     sizeHistogram
    79  	}{
    80  		{
    81  			path:    "/",
    82  			size:    1322310,
    83  			flatten: true,
    84  			objs:    8,
    85  			oSizes:  sizeHistogram{0: 2, 1: 3, 2: 2, 4: 1},
    86  		},
    87  		{
    88  			path:   "/",
    89  			size:   20000,
    90  			objs:   2,
    91  			oSizes: sizeHistogram{1: 2},
    92  		},
    93  		{
    94  			path:   "/dir1",
    95  			size:   1302010,
    96  			objs:   5,
    97  			oSizes: sizeHistogram{0: 1, 1: 1, 2: 2, 4: 1},
    98  		},
    99  		{
   100  			path:  "/dir1/dira",
   101  			isNil: true,
   102  		},
   103  		{
   104  			path:  "/nonexistying",
   105  			isNil: true,
   106  		},
   107  	}
   108  
   109  	for _, w := range want {
   110  		p := path.Join(bucket, w.path)
   111  		t.Run(p, func(t *testing.T) {
   112  			e := got.find(p)
   113  			if w.isNil {
   114  				if e != nil {
   115  					t.Error("want nil, got", e)
   116  				}
   117  				return
   118  			}
   119  			if e == nil {
   120  				t.Fatal("got nil result")
   121  			}
   122  			if w.flatten {
   123  				*e = got.flatten(*e)
   124  			}
   125  			if e.Size != int64(w.size) {
   126  				t.Error("got size", e.Size, "want", w.size)
   127  			}
   128  			if e.Objects != uint64(w.objs) {
   129  				t.Error("got objects", e.Objects, "want", w.objs)
   130  			}
   131  			if e.Versions != uint64(w.objs) {
   132  				t.Error("got versions", e.Versions, "want", w.objs)
   133  			}
   134  			if e.ObjSizes != w.oSizes {
   135  				t.Error("got histogram", e.ObjSizes, "want", w.oSizes)
   136  			}
   137  		})
   138  	}
   139  
   140  	files = []usageTestFile{
   141  		{
   142  			name: "newfolder/afile",
   143  			size: 4,
   144  		},
   145  		{
   146  			name: "newfolder/anotherone",
   147  			size: 1,
   148  		},
   149  		{
   150  			name: "newfolder/anemptyone",
   151  			size: 0,
   152  		},
   153  		{
   154  			name: "dir1/fileindir1",
   155  			size: 20000,
   156  		},
   157  		{
   158  			name: "dir1/dirc/fileindirc",
   159  			size: 20000,
   160  		},
   161  		{
   162  			name: "rootfile3",
   163  			size: 1000,
   164  		},
   165  		{
   166  			name: "dir1/dira/dirasub/fileindira2",
   167  			size: 200,
   168  		},
   169  	}
   170  	createUsageTestFiles(t, base, bucket, files)
   171  	err = os.RemoveAll(filepath.Join(base, bucket, "dir1/dira/dirasub/dcfile"))
   172  	if err != nil {
   173  		t.Fatal(err)
   174  	}
   175  	// Changed dir must be picked up in this many cycles.
   176  	for i := 0; i < dataUsageUpdateDirCycles; i++ {
   177  		got, err = scanDataFolder(context.Background(), nil, base, got, getSize, 0, weSleep)
   178  		got.Info.NextCycle++
   179  		if err != nil {
   180  			t.Fatal(err)
   181  		}
   182  	}
   183  
   184  	want = []struct {
   185  		path       string
   186  		isNil      bool
   187  		size, objs int
   188  		flatten    bool
   189  		oSizes     sizeHistogram
   190  	}{
   191  		{
   192  			path:    "/",
   193  			size:    363515,
   194  			flatten: true,
   195  			objs:    14,
   196  			oSizes:  sizeHistogram{0: 7, 1: 5, 2: 2},
   197  		},
   198  		{
   199  			path:    "/dir1",
   200  			size:    342210,
   201  			objs:    7,
   202  			flatten: false,
   203  			oSizes:  sizeHistogram{0: 2, 1: 3, 2: 2},
   204  		},
   205  		{
   206  			path:   "/newfolder",
   207  			size:   5,
   208  			objs:   3,
   209  			oSizes: sizeHistogram{0: 3},
   210  		},
   211  		{
   212  			path:  "/nonexistying",
   213  			isNil: true,
   214  		},
   215  	}
   216  
   217  	for _, w := range want {
   218  		p := path.Join(bucket, w.path)
   219  		t.Run(p, func(t *testing.T) {
   220  			e := got.find(p)
   221  			if w.isNil {
   222  				if e != nil {
   223  					t.Error("want nil, got", e)
   224  				}
   225  				return
   226  			}
   227  			if e == nil {
   228  				t.Fatal("got nil result")
   229  			}
   230  			if w.flatten {
   231  				*e = got.flatten(*e)
   232  			}
   233  			if e.Size != int64(w.size) {
   234  				t.Error("got size", e.Size, "want", w.size)
   235  			}
   236  			if e.Objects != uint64(w.objs) {
   237  				t.Error("got objects", e.Objects, "want", w.objs)
   238  			}
   239  			if e.Versions != uint64(w.objs) {
   240  				t.Error("got versions", e.Versions, "want", w.objs)
   241  			}
   242  			if e.ObjSizes != w.oSizes {
   243  				t.Error("got histogram", e.ObjSizes, "want", w.oSizes)
   244  			}
   245  		})
   246  	}
   247  }
   248  
   249  func TestDataUsageUpdatePrefix(t *testing.T) {
   250  	base := t.TempDir()
   251  	scannerSleeper.Update(0, 0)
   252  	files := []usageTestFile{
   253  		{name: "bucket/rootfile", size: 10000},
   254  		{name: "bucket/rootfile2", size: 10000},
   255  		{name: "bucket/dir1/d1file", size: 2000},
   256  		{name: "bucket/dir2/d2file", size: 300},
   257  		{name: "bucket/dir1/dira/dafile", size: 100000},
   258  		{name: "bucket/dir1/dira/dbfile", size: 200000},
   259  		{name: "bucket/dir1/dira/dirasub/dcfile", size: 1000000},
   260  		{name: "bucket/dir1/dira/dirasub/sublevel3/dccccfile", size: 10},
   261  	}
   262  	createUsageTestFiles(t, base, "", files)
   263  	const foldersBelow = 3
   264  	const filesBelowT = dataScannerCompactLeastObject / 2
   265  	const filesAboveT = dataScannerCompactAtFolders + 1
   266  	const expectSize = foldersBelow*filesBelowT + filesAboveT
   267  
   268  	generateUsageTestFiles(t, base, "bucket/dirwithalot", foldersBelow, filesBelowT, 1)
   269  	generateUsageTestFiles(t, base, "bucket/dirwithevenmore", filesAboveT, 1, 1)
   270  
   271  	getSize := func(item scannerItem) (sizeS sizeSummary, err error) {
   272  		if item.Typ&os.ModeDir == 0 {
   273  			var s os.FileInfo
   274  			s, err = os.Stat(item.Path)
   275  			if err != nil {
   276  				return
   277  			}
   278  			sizeS.totalSize = s.Size()
   279  			sizeS.versions++
   280  			return
   281  		}
   282  		return
   283  	}
   284  
   285  	weSleep := func() bool { return false }
   286  
   287  	got, err := scanDataFolder(context.Background(), nil, base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize, 0, weSleep)
   288  	if err != nil {
   289  		t.Fatal(err)
   290  	}
   291  	if got.root() == nil {
   292  		t.Log("cached folders:")
   293  		for folder := range got.Cache {
   294  			t.Log("folder:", folder)
   295  		}
   296  		t.Fatal("got nil root.")
   297  	}
   298  
   299  	// Test dirs
   300  	want := []struct {
   301  		path       string
   302  		isNil      bool
   303  		size, objs int
   304  		oSizes     sizeHistogram
   305  	}{
   306  		{
   307  			path:   "flat",
   308  			size:   1322310 + expectSize,
   309  			objs:   8 + expectSize,
   310  			oSizes: sizeHistogram{0: 2 + expectSize, 1: 3, 2: 2, 4: 1},
   311  		},
   312  		{
   313  			path:   "bucket/",
   314  			size:   20000,
   315  			objs:   2,
   316  			oSizes: sizeHistogram{1: 2},
   317  		},
   318  		{
   319  			// Gets compacted...
   320  			path:   "bucket/dir1",
   321  			size:   1302010,
   322  			objs:   5,
   323  			oSizes: sizeHistogram{0: 1, 1: 1, 2: 2, 4: 1},
   324  		},
   325  		{
   326  			// Gets compacted at this level...
   327  			path:   "bucket/dirwithalot/0",
   328  			size:   filesBelowT,
   329  			objs:   filesBelowT,
   330  			oSizes: sizeHistogram{0: filesBelowT},
   331  		},
   332  		{
   333  			// Gets compacted at this level (below obj threshold)...
   334  			path:   "bucket/dirwithalot/0",
   335  			size:   filesBelowT,
   336  			objs:   filesBelowT,
   337  			oSizes: sizeHistogram{0: filesBelowT},
   338  		},
   339  		{
   340  			// Gets compacted at this level...
   341  			path:   "bucket/dirwithevenmore",
   342  			size:   filesAboveT,
   343  			objs:   filesAboveT,
   344  			oSizes: sizeHistogram{0: filesAboveT},
   345  		},
   346  		{
   347  			path:  "bucket/nonexistying",
   348  			isNil: true,
   349  		},
   350  	}
   351  
   352  	for _, w := range want {
   353  		t.Run(w.path, func(t *testing.T) {
   354  			e := got.find(w.path)
   355  			if w.path == "flat" {
   356  				f := got.flatten(*got.root())
   357  				e = &f
   358  			}
   359  			if w.isNil {
   360  				if e != nil {
   361  					t.Error("want nil, got", e)
   362  				}
   363  				return
   364  			}
   365  			if e == nil {
   366  				t.Fatal("got nil result")
   367  				return
   368  			}
   369  			if e.Size != int64(w.size) {
   370  				t.Error("got size", e.Size, "want", w.size)
   371  			}
   372  			if e.Objects != uint64(w.objs) {
   373  				t.Error("got objects", e.Objects, "want", w.objs)
   374  			}
   375  			if e.Versions != uint64(w.objs) {
   376  				t.Error("got versions", e.Versions, "want", w.objs)
   377  			}
   378  			if e.ObjSizes != w.oSizes {
   379  				t.Error("got histogram", e.ObjSizes, "want", w.oSizes)
   380  			}
   381  		})
   382  	}
   383  
   384  	files = []usageTestFile{
   385  		{
   386  			name: "bucket/newfolder/afile",
   387  			size: 4,
   388  		},
   389  		{
   390  			name: "bucket/newfolder/anotherone",
   391  			size: 1,
   392  		},
   393  		{
   394  			name: "bucket/newfolder/anemptyone",
   395  			size: 0,
   396  		},
   397  		{
   398  			name: "bucket/dir1/fileindir1",
   399  			size: 20000,
   400  		},
   401  		{
   402  			name: "bucket/dir1/dirc/fileindirc",
   403  			size: 20000,
   404  		},
   405  		{
   406  			name: "bucket/rootfile3",
   407  			size: 1000,
   408  		},
   409  		{
   410  			name: "bucket/dir1/dira/dirasub/fileindira2",
   411  			size: 200,
   412  		},
   413  	}
   414  
   415  	createUsageTestFiles(t, base, "", files)
   416  	err = os.RemoveAll(filepath.Join(base, "bucket/dir1/dira/dirasub/dcfile"))
   417  	if err != nil {
   418  		t.Fatal(err)
   419  	}
   420  	// Changed dir must be picked up in this many cycles.
   421  	for i := 0; i < dataUsageUpdateDirCycles; i++ {
   422  		got, err = scanDataFolder(context.Background(), nil, base, got, getSize, 0, weSleep)
   423  		got.Info.NextCycle++
   424  		if err != nil {
   425  			t.Fatal(err)
   426  		}
   427  	}
   428  
   429  	want = []struct {
   430  		path       string
   431  		isNil      bool
   432  		size, objs int
   433  		oSizes     sizeHistogram
   434  	}{
   435  		{
   436  			path:   "flat",
   437  			size:   363515 + expectSize,
   438  			objs:   14 + expectSize,
   439  			oSizes: sizeHistogram{0: 7 + expectSize, 1: 5, 2: 2},
   440  		},
   441  		{
   442  			path:   "bucket/dir1",
   443  			size:   342210,
   444  			objs:   7,
   445  			oSizes: sizeHistogram{0: 2, 1: 3, 2: 2},
   446  		},
   447  		{
   448  			path:   "bucket/",
   449  			size:   21000,
   450  			objs:   3,
   451  			oSizes: sizeHistogram{0: 1, 1: 2},
   452  		},
   453  		{
   454  			path:   "bucket/newfolder",
   455  			size:   5,
   456  			objs:   3,
   457  			oSizes: sizeHistogram{0: 3},
   458  		},
   459  		{
   460  			// Compacted into bucket/dir1
   461  			path:  "bucket/dir1/dira",
   462  			isNil: true,
   463  		},
   464  		{
   465  			path:  "bucket/nonexistying",
   466  			isNil: true,
   467  		},
   468  	}
   469  
   470  	for _, w := range want {
   471  		t.Run(w.path, func(t *testing.T) {
   472  			e := got.find(w.path)
   473  			if w.path == "flat" {
   474  				f := got.flatten(*got.root())
   475  				e = &f
   476  			}
   477  			if w.isNil {
   478  				if e != nil {
   479  					t.Error("want nil, got", e)
   480  				}
   481  				return
   482  			}
   483  			if e == nil {
   484  				t.Error("got nil result")
   485  				return
   486  			}
   487  			if e.Size != int64(w.size) {
   488  				t.Error("got size", e.Size, "want", w.size)
   489  			}
   490  			if e.Objects != uint64(w.objs) {
   491  				t.Error("got objects", e.Objects, "want", w.objs)
   492  			}
   493  			if e.Versions != uint64(w.objs) {
   494  				t.Error("got versions", e.Versions, "want", w.objs)
   495  			}
   496  			if e.ObjSizes != w.oSizes {
   497  				t.Error("got histogram", e.ObjSizes, "want", w.oSizes)
   498  			}
   499  		})
   500  	}
   501  }
   502  
   503  func createUsageTestFiles(t *testing.T, base, bucket string, files []usageTestFile) {
   504  	for _, f := range files {
   505  		err := os.MkdirAll(filepath.Dir(filepath.Join(base, bucket, f.name)), os.ModePerm)
   506  		if err != nil {
   507  			t.Fatal(err)
   508  		}
   509  		err = os.WriteFile(filepath.Join(base, bucket, f.name), make([]byte, f.size), os.ModePerm)
   510  		if err != nil {
   511  			t.Fatal(err)
   512  		}
   513  	}
   514  }
   515  
   516  // generateUsageTestFiles create nFolders * nFiles files of size bytes each.
   517  func generateUsageTestFiles(t *testing.T, base, bucket string, nFolders, nFiles, size int) {
   518  	pl := make([]byte, size)
   519  	for i := 0; i < nFolders; i++ {
   520  		name := filepath.Join(base, bucket, fmt.Sprint(i), "0.txt")
   521  		err := os.MkdirAll(filepath.Dir(name), os.ModePerm)
   522  		if err != nil {
   523  			t.Fatal(err)
   524  		}
   525  		for j := 0; j < nFiles; j++ {
   526  			name := filepath.Join(base, bucket, fmt.Sprint(i), fmt.Sprint(j)+".txt")
   527  			err = os.WriteFile(name, pl, os.ModePerm)
   528  			if err != nil {
   529  				t.Fatal(err)
   530  			}
   531  		}
   532  	}
   533  }
   534  
   535  func TestDataUsageCacheSerialize(t *testing.T) {
   536  	base := t.TempDir()
   537  	const bucket = "abucket"
   538  	files := []usageTestFile{
   539  		{name: "rootfile", size: 10000},
   540  		{name: "rootfile2", size: 10000},
   541  		{name: "dir1/d1file", size: 2000},
   542  		{name: "dir2/d2file", size: 300},
   543  		{name: "dir2/d2file2", size: 300},
   544  		{name: "dir2/d2file3/", size: 300},
   545  		{name: "dir2/d2file4/", size: 300},
   546  		{name: "dir2/d2file5", size: 300},
   547  		{name: "dir1/dira/dafile", size: 100000},
   548  		{name: "dir1/dira/dbfile", size: 200000},
   549  		{name: "dir1/dira/dirasub/dcfile", size: 1000000},
   550  		{name: "dir1/dira/dirasub/sublevel3/dccccfile", size: 10},
   551  		{name: "dir1/dira/dirasub/sublevel3/dccccfile20", size: 20},
   552  		{name: "dir1/dira/dirasub/sublevel3/dccccfile30", size: 30},
   553  		{name: "dir1/dira/dirasub/sublevel3/dccccfile40", size: 40},
   554  	}
   555  	createUsageTestFiles(t, base, bucket, files)
   556  
   557  	getSize := func(item scannerItem) (sizeS sizeSummary, err error) {
   558  		if item.Typ&os.ModeDir == 0 {
   559  			var s os.FileInfo
   560  			s, err = os.Stat(item.Path)
   561  			if err != nil {
   562  				return
   563  			}
   564  			sizeS.versions++
   565  			sizeS.totalSize = s.Size()
   566  			return
   567  		}
   568  		return
   569  	}
   570  	weSleep := func() bool { return false }
   571  	want, err := scanDataFolder(context.Background(), nil, base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0, weSleep)
   572  	if err != nil {
   573  		t.Fatal(err)
   574  	}
   575  	e := want.find("abucket/dir2")
   576  	e.ReplicationStats = &replicationAllStats{
   577  		Targets: map[string]replicationStats{
   578  			"arn": {
   579  				PendingSize:    1,
   580  				ReplicatedSize: 2,
   581  				FailedSize:     3,
   582  				FailedCount:    5,
   583  				PendingCount:   6,
   584  			},
   585  		},
   586  	}
   587  	want.replace("abucket/dir2", "", *e)
   588  	var buf bytes.Buffer
   589  	err = want.serializeTo(&buf)
   590  	if err != nil {
   591  		t.Fatal(err)
   592  	}
   593  	t.Log("serialized size:", buf.Len(), "bytes")
   594  	var got dataUsageCache
   595  	err = got.deserialize(&buf)
   596  	if err != nil {
   597  		t.Fatal(err)
   598  	}
   599  	if got.Info.LastUpdate.IsZero() {
   600  		t.Error("lastupdate not set")
   601  	}
   602  
   603  	if !want.Info.LastUpdate.Equal(got.Info.LastUpdate) {
   604  		t.Fatalf("deserialize LastUpdate mismatch\nwant: %+v\ngot:  %+v", want, got)
   605  	}
   606  	if len(want.Cache) != len(got.Cache) {
   607  		t.Errorf("deserialize mismatch length\nwant: %+v\ngot:  %+v", len(want.Cache), len(got.Cache))
   608  	}
   609  	for wkey, wval := range want.Cache {
   610  		gotv := got.Cache[wkey]
   611  		if !equalAsJSON(gotv, wval) {
   612  			t.Errorf("deserialize mismatch, key %v\nwant: %#v\ngot:  %#v", wkey, wval, gotv)
   613  		}
   614  	}
   615  }
   616  
   617  // equalAsJSON returns whether the values are equal when encoded as JSON.
   618  func equalAsJSON(a, b interface{}) bool {
   619  	aj, err := json.Marshal(a)
   620  	if err != nil {
   621  		panic(err)
   622  	}
   623  	bj, err := json.Marshal(b)
   624  	if err != nil {
   625  		panic(err)
   626  	}
   627  	return bytes.Equal(aj, bj)
   628  }