github.com/matrixorigin/matrixone@v1.2.0/pkg/fileservice/disk_cache_test.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fileservice
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"crypto/rand"
    21  	"fmt"
    22  	"io"
    23  	"io/fs"
    24  	mrand "math/rand"
    25  	"path/filepath"
    26  	"strconv"
    27  	"testing"
    28  
    29  	"github.com/matrixorigin/matrixone/pkg/perfcounter"
    30  	"github.com/stretchr/testify/assert"
    31  )
    32  
    33  func TestDiskCache(t *testing.T) {
    34  	dir := t.TempDir()
    35  	ctx := context.Background()
    36  
    37  	// counter
    38  	numWritten := 0
    39  	ctx = OnDiskCacheWritten(ctx, func(path string, entry IOEntry) {
    40  		numWritten++
    41  	})
    42  
    43  	// new
    44  	cache, err := NewDiskCache(ctx, dir, 1<<20, nil)
    45  	assert.Nil(t, err)
    46  
    47  	// update
    48  	testUpdate := func(cache *DiskCache) {
    49  		vec := &IOVector{
    50  			FilePath: "foo",
    51  			Entries: []IOEntry{
    52  				{
    53  					Offset: 0,
    54  					Size:   1,
    55  					Data:   []byte("a"),
    56  				},
    57  				// no data
    58  				{
    59  					Offset: 98,
    60  					Size:   0,
    61  				},
    62  				// size unknown
    63  				{
    64  					Offset: 9999,
    65  					Size:   -1,
    66  					Data:   []byte("abc"),
    67  				},
    68  			},
    69  		}
    70  		err = cache.Update(ctx, vec, false)
    71  		assert.Nil(t, err)
    72  	}
    73  	testUpdate(cache)
    74  
    75  	assert.Equal(t, 1, numWritten)
    76  
    77  	// update again
    78  	testUpdate(cache)
    79  
    80  	assert.Equal(t, 1, numWritten)
    81  
    82  	// read
    83  	testRead := func(cache *DiskCache) {
    84  		buf := new(bytes.Buffer)
    85  		var r io.ReadCloser
    86  		vec := &IOVector{
    87  			FilePath: "foo",
    88  			Entries: []IOEntry{
    89  				// written data
    90  				{
    91  					Offset:            0,
    92  					Size:              1,
    93  					WriterForRead:     buf,
    94  					ReadCloserForRead: &r,
    95  				},
    96  				// not exists
    97  				{
    98  					Offset: 1,
    99  					Size:   1,
   100  				},
   101  				// bad offset
   102  				{
   103  					Offset: 999,
   104  					Size:   1,
   105  				},
   106  			},
   107  		}
   108  		err = cache.Read(ctx, vec)
   109  		assert.Nil(t, err)
   110  		assert.NotNil(t, r)
   111  		defer r.Close()
   112  		assert.True(t, vec.Entries[0].done)
   113  		assert.Equal(t, []byte("a"), vec.Entries[0].Data)
   114  		assert.Equal(t, []byte("a"), buf.Bytes())
   115  		bs, err := io.ReadAll(r)
   116  		assert.Nil(t, err)
   117  		assert.Equal(t, []byte("a"), bs)
   118  		assert.False(t, vec.Entries[1].done)
   119  		assert.False(t, vec.Entries[2].done)
   120  	}
   121  	testRead(cache)
   122  
   123  	// read again
   124  	testRead(cache)
   125  
   126  	// new cache instance and read
   127  	cache, err = NewDiskCache(ctx, dir, 1<<20, nil)
   128  	assert.Nil(t, err)
   129  	testRead(cache)
   130  
   131  	assert.Equal(t, 1, numWritten)
   132  
   133  	// new cache instance and update
   134  	cache, err = NewDiskCache(ctx, dir, 1<<20, nil)
   135  	assert.Nil(t, err)
   136  	testUpdate(cache)
   137  
   138  	assert.Equal(t, 1, numWritten)
   139  
   140  	// delete file
   141  	err = cache.DeletePaths(ctx, []string{"foo"})
   142  	assert.Nil(t, err)
   143  }
   144  
   145  func TestDiskCacheWriteAgain(t *testing.T) {
   146  
   147  	dir := t.TempDir()
   148  	ctx := context.Background()
   149  	var counterSet perfcounter.CounterSet
   150  	ctx = perfcounter.WithCounterSet(ctx, &counterSet)
   151  
   152  	cache, err := NewDiskCache(ctx, dir, 4096, nil)
   153  	assert.Nil(t, err)
   154  
   155  	// update
   156  	err = cache.Update(ctx, &IOVector{
   157  		FilePath: "foo",
   158  		Entries: []IOEntry{
   159  			{
   160  				Size: 3,
   161  				Data: []byte("foo"),
   162  			},
   163  		},
   164  	}, false)
   165  	assert.Nil(t, err)
   166  	assert.Equal(t, int64(1), counterSet.FileService.Cache.Disk.WriteFile.Load())
   167  	assert.Equal(t, int64(0), counterSet.FileService.Cache.Disk.Evict.Load())
   168  
   169  	// update another entry
   170  	err = cache.Update(ctx, &IOVector{
   171  		FilePath: "foo",
   172  		Entries: []IOEntry{
   173  			{
   174  				Size:   3,
   175  				Data:   []byte("foo"),
   176  				Offset: 99,
   177  			},
   178  		},
   179  	}, false)
   180  	assert.Nil(t, err)
   181  	assert.Equal(t, int64(2), counterSet.FileService.Cache.Disk.WriteFile.Load())
   182  	assert.Equal(t, int64(1), counterSet.FileService.Cache.Disk.Evict.Load())
   183  
   184  	// update again, should write cache file again
   185  	err = cache.Update(ctx, &IOVector{
   186  		FilePath: "foo",
   187  		Entries: []IOEntry{
   188  			{
   189  				Size: 3,
   190  				Data: []byte("foo"),
   191  			},
   192  		},
   193  	}, false)
   194  	assert.Nil(t, err)
   195  	assert.Equal(t, int64(3), counterSet.FileService.Cache.Disk.WriteFile.Load())
   196  	assert.Equal(t, int64(2), counterSet.FileService.Cache.Disk.Evict.Load())
   197  
   198  	err = cache.Read(ctx, &IOVector{
   199  		FilePath: "foo",
   200  		Entries: []IOEntry{
   201  			{
   202  				Size: 3,
   203  			},
   204  		},
   205  	})
   206  	assert.Nil(t, err)
   207  	assert.Equal(t, int64(1), counterSet.FileService.Cache.Disk.Hit.Load())
   208  
   209  }
   210  
   211  func TestDiskCacheFileCache(t *testing.T) {
   212  	dir := t.TempDir()
   213  	ctx := context.Background()
   214  	cache, err := NewDiskCache(ctx, dir, 1<<20, nil)
   215  	assert.Nil(t, err)
   216  
   217  	vector := IOVector{
   218  		FilePath: "foo",
   219  		Entries: []IOEntry{
   220  			{
   221  				Offset: 0,
   222  				Size:   3,
   223  				Data:   []byte("foo"),
   224  			},
   225  			{
   226  				Offset: 3,
   227  				Size:   3,
   228  				Data:   []byte("bar"),
   229  			},
   230  		},
   231  	}
   232  
   233  	data, err := io.ReadAll(newIOEntriesReader(ctx, vector.Entries))
   234  	assert.Nil(t, err)
   235  	err = cache.SetFile(ctx, vector.FilePath, func(context.Context) (io.ReadCloser, error) {
   236  		return io.NopCloser(bytes.NewReader(data)), nil
   237  	})
   238  	assert.Nil(t, err)
   239  
   240  	readVector := &IOVector{
   241  		FilePath: "foo",
   242  		Entries: []IOEntry{
   243  			{
   244  				Offset: 0,
   245  				Size:   2,
   246  			},
   247  			{
   248  				Offset: 2,
   249  				Size:   2,
   250  			},
   251  			{
   252  				Offset: 4,
   253  				Size:   2,
   254  			},
   255  		},
   256  	}
   257  	err = cache.Read(ctx, readVector)
   258  	assert.Nil(t, err)
   259  	assert.Equal(t, []byte("fo"), readVector.Entries[0].Data)
   260  	assert.Equal(t, []byte("ob"), readVector.Entries[1].Data)
   261  	assert.Equal(t, []byte("ar"), readVector.Entries[2].Data)
   262  
   263  }
   264  
   265  func TestDiskCacheDirSize(t *testing.T) {
   266  	ctx := context.Background()
   267  	var counter perfcounter.CounterSet
   268  	ctx = perfcounter.WithCounterSet(ctx, &counter)
   269  
   270  	dir := t.TempDir()
   271  	capacity := 1 << 20
   272  	cache, err := NewDiskCache(ctx, dir, capacity, nil)
   273  	assert.Nil(t, err)
   274  
   275  	data := bytes.Repeat([]byte("a"), capacity/128)
   276  	for i := 0; i < capacity/len(data)*64; i++ {
   277  		err := cache.Update(ctx, &IOVector{
   278  			FilePath: fmt.Sprintf("%v", i),
   279  			Entries: []IOEntry{
   280  				{
   281  					Offset: 0,
   282  					Size:   int64(len(data)),
   283  					Data:   data,
   284  				},
   285  			},
   286  		}, false)
   287  		assert.Nil(t, err)
   288  		size := dirSize(dir)
   289  		assert.LessOrEqual(t, size, capacity)
   290  	}
   291  	assert.True(t, counter.FileService.Cache.Disk.Evict.Load() > 0)
   292  }
   293  
   294  func dirSize(path string) (ret int) {
   295  	if err := filepath.WalkDir(path, func(path string, d fs.DirEntry, err error) error {
   296  		if err != nil {
   297  			return err
   298  		}
   299  		if d.IsDir() {
   300  			return nil
   301  		}
   302  		stat, err := d.Info()
   303  		if err != nil {
   304  			return err
   305  		}
   306  		ret += int(fileSize(stat))
   307  		return nil
   308  	}); err != nil {
   309  		panic(err)
   310  	}
   311  	return
   312  }
   313  
   314  func benchmarkDiskCacheWriteThenRead(
   315  	b *testing.B,
   316  	size int64,
   317  ) {
   318  	b.Helper()
   319  
   320  	b.SetBytes(size)
   321  	data := bytes.Repeat([]byte("a"), int(size))
   322  
   323  	dir := b.TempDir()
   324  	ctx := context.Background()
   325  
   326  	cache, err := NewDiskCache(
   327  		ctx,
   328  		dir,
   329  		10<<30,
   330  		nil,
   331  	)
   332  	if err != nil {
   333  		b.Fatal(err)
   334  	}
   335  
   336  	b.ResetTimer()
   337  
   338  	b.RunParallel(func(pb *testing.PB) {
   339  		buf := make([]byte, 0, size)
   340  		for pb.Next() {
   341  
   342  			path := strconv.FormatInt(mrand.Int63(), 10)
   343  
   344  			// update
   345  			err := cache.Update(
   346  				ctx,
   347  				&IOVector{
   348  					FilePath: path,
   349  					Entries: []IOEntry{
   350  						{
   351  							Size: size,
   352  							Data: data,
   353  						},
   354  					},
   355  				},
   356  				false,
   357  			)
   358  			if err != nil {
   359  				b.Fatal(err)
   360  			}
   361  
   362  			// read
   363  			vec := &IOVector{
   364  				FilePath: path,
   365  				Entries: []IOEntry{
   366  					{
   367  						Size: size,
   368  						Data: buf,
   369  					},
   370  				},
   371  			}
   372  			err = cache.Read(
   373  				ctx,
   374  				vec,
   375  			)
   376  			if err != nil {
   377  				b.Fatal(err)
   378  			}
   379  			if !bytes.Equal(vec.Entries[0].Data, data) {
   380  				b.Fatal()
   381  			}
   382  
   383  		}
   384  	})
   385  
   386  }
   387  
   388  func BenchmarkDiskCacheWriteThenRead(b *testing.B) {
   389  	b.Run("4K", func(b *testing.B) {
   390  		benchmarkDiskCacheWriteThenRead(b, 4096)
   391  	})
   392  	b.Run("1M", func(b *testing.B) {
   393  		benchmarkDiskCacheWriteThenRead(b, 1<<20)
   394  	})
   395  	b.Run("16M", func(b *testing.B) {
   396  		benchmarkDiskCacheWriteThenRead(b, 16<<20)
   397  	})
   398  }
   399  
   400  func benchmarkDiskCacheReadRandomOffsetAtLargeFile(
   401  	b *testing.B,
   402  	fileSize int64,
   403  	readSize int64,
   404  ) {
   405  	b.Helper()
   406  
   407  	b.SetBytes(readSize)
   408  	data := make([]byte, fileSize)
   409  	_, err := rand.Read(data)
   410  	if err != nil {
   411  		b.Fatal(err)
   412  	}
   413  
   414  	dir := b.TempDir()
   415  	ctx := context.Background()
   416  
   417  	cache, err := NewDiskCache(
   418  		ctx,
   419  		dir,
   420  		8<<30,
   421  		nil,
   422  	)
   423  	if err != nil {
   424  		b.Fatal(err)
   425  	}
   426  
   427  	err = cache.SetFile(ctx, "foo", func(ctx context.Context) (io.ReadCloser, error) {
   428  		return io.NopCloser(bytes.NewReader(data)), nil
   429  	})
   430  	if err != nil {
   431  		b.Fatal(err)
   432  	}
   433  
   434  	b.ResetTimer()
   435  
   436  	b.RunParallel(func(pb *testing.PB) {
   437  		buf := make([]byte, 0, readSize)
   438  		for pb.Next() {
   439  
   440  			// read
   441  			offset := mrand.Intn(int(fileSize - readSize))
   442  			vec := &IOVector{
   443  				FilePath: "foo",
   444  				Entries: []IOEntry{
   445  					{
   446  						Offset: int64(offset),
   447  						Size:   readSize,
   448  						Data:   buf,
   449  					},
   450  				},
   451  			}
   452  			err = cache.Read(
   453  				ctx,
   454  				vec,
   455  			)
   456  			if err != nil {
   457  				b.Fatal(err)
   458  			}
   459  			if !bytes.Equal(vec.Entries[0].Data, data[offset:offset+int(readSize)]) {
   460  				b.Fatal()
   461  			}
   462  
   463  		}
   464  	})
   465  
   466  }
   467  
   468  func BenchmarkDiskCacheReadRandomOffsetAtLargeFile(b *testing.B) {
   469  	b.Run("4K", func(b *testing.B) {
   470  		benchmarkDiskCacheReadRandomOffsetAtLargeFile(b, 1<<30, 4096)
   471  	})
   472  	b.Run("1M", func(b *testing.B) {
   473  		benchmarkDiskCacheReadRandomOffsetAtLargeFile(b, 1<<30, 1<<20)
   474  	})
   475  	b.Run("16M", func(b *testing.B) {
   476  		benchmarkDiskCacheReadRandomOffsetAtLargeFile(b, 1<<30, 16<<20)
   477  	})
   478  }
   479  
   480  func BenchmarkDiskCacheMultipleIOEntries(b *testing.B) {
   481  	dir := b.TempDir()
   482  	ctx := context.Background()
   483  
   484  	cache, err := NewDiskCache(
   485  		ctx,
   486  		dir,
   487  		8<<30,
   488  		nil,
   489  	)
   490  	if err != nil {
   491  		b.Fatal(err)
   492  	}
   493  
   494  	err = cache.SetFile(ctx, "foo", func(ctx context.Context) (io.ReadCloser, error) {
   495  		return io.NopCloser(bytes.NewReader(bytes.Repeat([]byte("a"), 1<<20))), nil
   496  	})
   497  	if err != nil {
   498  		b.Fatal(err)
   499  	}
   500  
   501  	b.ResetTimer()
   502  
   503  	for i := 0; i < b.N; i++ {
   504  		var entries []IOEntry
   505  		for i := 0; i < 64; i++ {
   506  			entries = append(entries, IOEntry{
   507  				Offset: 0,
   508  				Size:   4096,
   509  			})
   510  		}
   511  		var counter perfcounter.CounterSet
   512  		ctx := perfcounter.WithCounterSet(ctx, &counter)
   513  		err := cache.Read(
   514  			ctx,
   515  			&IOVector{
   516  				FilePath: "foo",
   517  				Entries:  entries,
   518  			},
   519  		)
   520  		if err != nil {
   521  			b.Fatal(err)
   522  		}
   523  		numOpen := counter.FileService.Cache.Disk.OpenFullFile.Load()
   524  		if numOpen != 1 {
   525  			b.Fatal()
   526  		}
   527  	}
   528  }