github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/file/internal/testutil/testutil.go

github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/file/internal/testutil/testutil.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package testutil
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"math/rand"
    13  	"runtime"
    14  	"sort"
    15  	"testing"
    16  	"time"
    17  
    18  	"github.com/Schaudge/grailbase/errors"
    19  	"github.com/Schaudge/grailbase/file"
    20  	"github.com/Schaudge/grailbase/ioctx"
    21  	"github.com/Schaudge/grailbase/traverse"
    22  	"github.com/grailbio/testutil/assert"
    23  )
    24  
    25  func doRead(t *testing.T, r io.Reader, len int) string {
    26  	data := make([]byte, len)
    27  	n, err := io.ReadFull(r, data)
    28  	assert.EQ(t, len, n)
    29  	if err == io.EOF {
    30  		assert.EQ(t, 0, n)
    31  	} else {
    32  		assert.NoError(t, err)
    33  	}
    34  	return string(data)
    35  }
    36  
    37  func doReadAll(t *testing.T, r io.Reader) string {
    38  	data, err := ioutil.ReadAll(r)
    39  	assert.NoError(t, err)
    40  	return string(data)
    41  }
    42  
    43  func doSeek(t *testing.T, r io.Seeker, off int64, whence int) {
    44  	n, err := r.Seek(off, whence)
    45  	assert.NoError(t, err)
    46  	if whence == io.SeekStart {
    47  		assert.EQ(t, n, off)
    48  	}
    49  }
    50  
    51  func doReadFile(ctx context.Context, t *testing.T, impl file.Implementation, path string) string {
    52  	f, err := impl.Open(ctx, path)
    53  	assert.NoError(t, err, "open: %v", path)
    54  	data := doReadAll(t, f.Reader(ctx))
    55  	assert.NoError(t, f.Close(ctx))
    56  	return data
    57  }
    58  
    59  func doWriteFile(ctx context.Context, t *testing.T, impl file.Implementation, path string, data string) {
    60  	f, err := impl.Create(ctx, path)
    61  	assert.NoError(t, err, "create: %v", path)
    62  	w := f.Writer(ctx)
    63  	_, err = w.Write([]byte(data))
    64  	assert.NoError(t, err)
    65  	assert.NoError(t, f.Close(ctx))
    66  }
    67  
    68  func fileExists(ctx context.Context, impl file.Implementation, path string) bool {
    69  	_, err := impl.Stat(ctx, path)
    70  	if err != nil && !errors.Is(errors.NotExist, err) {
    71  		panic(err)
    72  	}
    73  	return err == nil
    74  }
    75  
    76  // TestEmpty creates an empty file and tests its operations.
    77  func TestEmpty(
    78  	ctx context.Context,
    79  	t *testing.T,
    80  	impl file.Implementation,
    81  	path string) {
    82  	f, err := impl.Create(ctx, path)
    83  	assert.NoError(t, err)
    84  	assert.NoError(t, err)
    85  	assert.NoError(t, f.Close(ctx))
    86  
    87  	f, err = impl.Open(ctx, path)
    88  	assert.NoError(t, err)
    89  	assert.EQ(t, "", doReadAll(t, f.Reader(ctx)))
    90  	assert.NoError(t, f.Close(ctx))
    91  
    92  	// Seek past the end of the file.
    93  	f, err = impl.Open(ctx, path)
    94  	assert.NoError(t, err)
    95  	r := f.Reader(ctx)
    96  	off, err := r.Seek(10, io.SeekStart)
    97  	assert.NoError(t, err)
    98  	assert.EQ(t, int64(10), off)
    99  	assert.EQ(t, "", doReadAll(t, f.Reader(ctx)))
   100  	assert.NoError(t, f.Close(ctx))
   101  }
   102  
   103  // TestNotExist tests that the implementation behaves correctly
   104  // for paths that do not exist.
   105  func TestNotExist(
   106  	ctx context.Context,
   107  	t *testing.T,
   108  	impl file.Implementation,
   109  	path string) {
   110  	_, err := impl.Open(ctx, path)
   111  	assert.True(t, errors.Is(errors.NotExist, err))
   112  	_, err = impl.Stat(ctx, path)
   113  	assert.True(t, errors.Is(errors.NotExist, err))
   114  }
   115  
   116  // TestErrors tests handling of errors. "path" shouldn't exist.
   117  func TestErrors(
   118  	ctx context.Context,
   119  	t *testing.T,
   120  	impl file.Implementation,
   121  	path string) {
   122  	_, err := impl.Stat(ctx, path)
   123  	assert.NotNil(t, err)
   124  	f, err := impl.Open(ctx, path)
   125  	if err == nil {
   126  		// S3 allows opening an non-existent file. But Stat or any other operation
   127  		// for such a file fails.
   128  		_, err := f.Stat(ctx)
   129  		t.Logf("errortest %s: stat error %v", path, err)
   130  		assert.NotNil(t, err)
   131  		assert.NoError(t, f.Close(ctx))
   132  	}
   133  }
   134  
   135  // TestReads tests various combination of reads and seeks.
   136  func TestReads(
   137  	ctx context.Context,
   138  	t *testing.T,
   139  	impl file.Implementation,
   140  	path string) {
   141  	expected := "A purple fox jumped over a blue cat"
   142  	doWriteFile(ctx, t, impl, path, expected)
   143  
   144  	// Read everything.
   145  	f, err := impl.Open(ctx, path)
   146  	assert.NoError(t, err)
   147  	assert.EQ(t, expected, doReadAll(t, f.Reader(ctx)))
   148  
   149  	// Read in two chunks.
   150  	r := f.Reader(ctx)
   151  	doSeek(t, r, 0, io.SeekStart)
   152  	assert.EQ(t, expected[:3], doRead(t, r, 3))
   153  	assert.EQ(t, expected[3:], doReadAll(t, r))
   154  
   155  	// Stat
   156  	stat, err := f.Stat(ctx)
   157  	assert.NoError(t, err)
   158  	assert.EQ(t, int64(len(expected)), stat.Size())
   159  
   160  	// Reading again should provide no data, since the seek pointer is at the end.
   161  	r = f.Reader(ctx)
   162  	assert.EQ(t, "", doReadAll(t, r))
   163  	doSeek(t, r, 3, io.SeekStart)
   164  	assert.EQ(t, expected[3:], doReadAll(t, r))
   165  
   166  	// Read bytes 4-7.
   167  	doSeek(t, r, 4, io.SeekStart)
   168  	assert.EQ(t, expected[4:7], doRead(t, r, 3))
   169  
   170  	// Seek beyond the end of the file.
   171  	doSeek(t, r, int64(len(expected)+1), io.SeekStart)
   172  	assert.EQ(t, "", doReadAll(t, r))
   173  
   174  	// Seek to the beginning.
   175  	doSeek(t, r, 0, io.SeekStart)
   176  	assert.EQ(t, expected, doReadAll(t, r))
   177  
   178  	// Seek twice to the same offset
   179  	doSeek(t, r, 1, io.SeekStart)
   180  	doSeek(t, r, 1, io.SeekStart)
   181  	assert.EQ(t, expected[1:], doReadAll(t, r))
   182  
   183  	doSeek(t, r, 8, io.SeekStart)
   184  	doSeek(t, r, -6, io.SeekCurrent)
   185  	assert.EQ(t, "purple", doRead(t, r, 6))
   186  
   187  	doSeek(t, r, -3, io.SeekEnd)
   188  	assert.EQ(t, "cat", doReadAll(t, r))
   189  }
   190  
   191  // TestWrites tests file Write functions.
   192  func TestWrites(ctx context.Context, t *testing.T, impl file.Implementation, dir string) {
   193  	path := dir + "/tmp.txt"
   194  	_ = impl.Remove(ctx, path)
   195  
   196  	f, err := impl.Create(ctx, path)
   197  	assert.NoError(t, err)
   198  	assert.EQ(t, f.Name(), path)
   199  	w := f.Writer(ctx)
   200  	n, err := w.Write([]byte("writetest"))
   201  	assert.NoError(t, err)
   202  	assert.EQ(t, n, 9)
   203  
   204  	// The file shouldn't exist before we call Close.
   205  	assert.False(t, fileExists(ctx, impl, path), "write %v", path)
   206  	// After close, the file becomes visible.
   207  	assert.NoError(t, f.Close(ctx))
   208  	assert.True(t, fileExists(ctx, impl, path), "write %v", path)
   209  
   210  	// Read the file back.
   211  	assert.EQ(t, doReadFile(ctx, t, impl, path), "writetest")
   212  
   213  	// Overwrite the file
   214  	f, err = impl.Create(ctx, path)
   215  	assert.NoError(t, err)
   216  	w = f.Writer(ctx)
   217  	n, err = w.Write([]byte("anotherwrite"))
   218  	assert.NoError(t, err)
   219  	assert.EQ(t, n, 12)
   220  
   221  	// Before closing, the file should store old contents
   222  	assert.EQ(t, doReadFile(ctx, t, impl, path), "writetest")
   223  
   224  	// On close, the file is updated to the new contents.
   225  	assert.NoError(t, f.Close(ctx))
   226  	assert.EQ(t, doReadFile(ctx, t, impl, path), "anotherwrite")
   227  }
   228  
   229  func TestDiscard(ctx context.Context, t *testing.T, impl file.Implementation, dir string) {
   230  	path := dir + "/tmp.txt"
   231  	_ = impl.Remove(ctx, path)
   232  
   233  	f, err := impl.Create(ctx, path)
   234  	assert.NoError(t, err)
   235  	w := f.Writer(ctx)
   236  	_, err = w.Write([]byte("writetest"))
   237  	assert.NoError(t, err)
   238  
   239  	// Discard, and then make sure it doesn't exist.
   240  	f.Discard(ctx)
   241  	if fileExists(ctx, impl, path) {
   242  		t.Errorf("path %s exists after call to discard", path)
   243  	}
   244  }
   245  
   246  // TestRemove tests file Remove() function.
   247  func TestRemove(ctx context.Context, t *testing.T, impl file.Implementation, path string) {
   248  	doWriteFile(ctx, t, impl, path, "removetest")
   249  	assert.True(t, fileExists(ctx, impl, path))
   250  	assert.NoError(t, impl.Remove(ctx, path))
   251  	assert.False(t, fileExists(ctx, impl, path))
   252  }
   253  
   254  // TestStat tests Stat method implementations.
   255  func TestStat(ctx context.Context, t *testing.T, impl file.Implementation, path string) {
   256  	// {min,max}ModTime define the range of reasonable modtime for the test file.
   257  	// We allow for 1 minute slack to account for clock skew on the file server.
   258  	minModTime := time.Now().Add(-60 * time.Second)
   259  	doWriteFile(ctx, t, impl, path, "stattest0")
   260  
   261  	dir := path + "dir"
   262  	doWriteFile(ctx, t, impl, dir+"/file", "stattest1")
   263  	maxModTime := time.Now().Add(60 * time.Second)
   264  
   265  	f, err := impl.Open(ctx, path)
   266  	assert.NoError(t, err)
   267  	info, err := f.Stat(ctx)
   268  	assert.NoError(t, f.Close(ctx))
   269  
   270  	assert.NoError(t, err)
   271  	assert.EQ(t, int64(9), info.Size())
   272  	assert.True(t, info.ModTime().After(minModTime) && info.ModTime().Before(maxModTime),
   273  		"Info: %+v, min %+v, max %+v", info.ModTime(), minModTime, maxModTime)
   274  
   275  	info2, err := impl.Stat(ctx, path)
   276  	assert.NoError(t, err)
   277  	assert.EQ(t, info, info2)
   278  
   279  	// Stat on directory is not supported.
   280  	_, err = impl.Stat(ctx, dir)
   281  	assert.NotNil(t, err)
   282  }
   283  
   284  type dirEntry struct {
   285  	path string
   286  	size int64
   287  }
   288  
   289  // TestList tests List implementations.
   290  func TestList(ctx context.Context, t *testing.T, impl file.Implementation, dir string) {
   291  	doList := func(prefix string) (ents []dirEntry) {
   292  		lister := impl.List(ctx, prefix, true)
   293  		for lister.Scan() {
   294  			ents = append(ents, dirEntry{lister.Path(), lister.Info().Size()})
   295  		}
   296  		sort.Slice(ents, func(i, j int) bool { return ents[i].path < ents[j].path })
   297  		return
   298  	}
   299  	doWriteFile(ctx, t, impl, dir+"/f0.txt", "f0")
   300  	doWriteFile(ctx, t, impl, dir+"/g0.txt", "g12")
   301  	doWriteFile(ctx, t, impl, dir+"/d0.txt", "d0e1")
   302  	doWriteFile(ctx, t, impl, dir+"/d0/f2.txt", "d0/f23")
   303  	doWriteFile(ctx, t, impl, dir+"/d0/d1/f3.txt", "d0/f345")
   304  
   305  	assert.EQ(t, []dirEntry{
   306  		dirEntry{dir + "/f0.txt", 2},
   307  	}, doList(dir+"/f0.txt"))
   308  
   309  	assert.EQ(t, []dirEntry{
   310  		dirEntry{dir + "/d0.txt", 4},
   311  		dirEntry{dir + "/d0/d1/f3.txt", 7},
   312  		dirEntry{dir + "/d0/f2.txt", 6},
   313  		dirEntry{dir + "/f0.txt", 2},
   314  		dirEntry{dir + "/g0.txt", 3},
   315  	}, doList(dir))
   316  
   317  	// List only lists files under the given directory.
   318  	// So listing "d0" should exclude d0.txt.
   319  	assert.EQ(t, []dirEntry{
   320  		dirEntry{dir + "/d0/d1/f3.txt", 7},
   321  		dirEntry{dir + "/d0/f2.txt", 6},
   322  	}, doList(dir+"/d0"))
   323  	assert.EQ(t, []dirEntry{
   324  		dirEntry{dir + "/d0/d1/f3.txt", 7},
   325  		dirEntry{dir + "/d0/f2.txt", 6},
   326  	}, doList(dir+"/d0/"))
   327  }
   328  
   329  // TestListDir tests ListDir implementations.
   330  func TestListDir(ctx context.Context, t *testing.T, impl file.Implementation, dir string) {
   331  	doList := func(prefix string) (ents []dirEntry) {
   332  		lister := impl.List(ctx, prefix, false)
   333  		for lister.Scan() {
   334  			de := dirEntry{lister.Path(), 0}
   335  			if !lister.IsDir() {
   336  				de.size = lister.Info().Size()
   337  			}
   338  			ents = append(ents, de)
   339  		}
   340  		sort.Slice(ents, func(i, j int) bool { return ents[i].path < ents[j].path })
   341  		return
   342  	}
   343  	doWriteFile(ctx, t, impl, dir+"/f0.txt", "f0")
   344  	doWriteFile(ctx, t, impl, dir+"/g0.txt", "g12")
   345  	doWriteFile(ctx, t, impl, dir+"/d0.txt", "d0e1")
   346  	doWriteFile(ctx, t, impl, dir+"/d0/f2.txt", "d0/f23")
   347  	doWriteFile(ctx, t, impl, dir+"/d0/d1/f3.txt", "d0/f345")
   348  
   349  	assert.EQ(t, []dirEntry{
   350  		dirEntry{dir + "/d0", 0},
   351  		dirEntry{dir + "/d0.txt", 4},
   352  		dirEntry{dir + "/f0.txt", 2},
   353  		dirEntry{dir + "/g0.txt", 3},
   354  	}, doList(dir))
   355  
   356  	// List only lists files under the given directory.
   357  	// So listing "d0" should exclude d0.txt.
   358  	assert.EQ(t, []dirEntry{
   359  		dirEntry{dir + "/d0/d1", 0},
   360  		dirEntry{dir + "/d0/f2.txt", 6},
   361  	}, doList(dir+"/d0"))
   362  	assert.EQ(t, []dirEntry{
   363  		dirEntry{dir + "/d0/d1", 0},
   364  		dirEntry{dir + "/d0/f2.txt", 6},
   365  	}, doList(dir+"/d0/"))
   366  }
   367  
   368  // TestStandard runs tests for all of the standard file API functionality.
   369  func TestStandard(ctx context.Context, t *testing.T, impl file.Implementation, dir string) {
   370  	iName := impl.String()
   371  
   372  	t.Run(iName+"_Empty", func(t *testing.T) { TestEmpty(ctx, t, impl, dir+"/empty.txt") })
   373  	t.Run(iName+"_NotExist", func(t *testing.T) { TestNotExist(ctx, t, impl, dir+"/notexist.txt") })
   374  	t.Run(iName+"_Errors", func(t *testing.T) { TestErrors(ctx, t, impl, dir+"/errors.txt") })
   375  	t.Run(iName+"_Reads", func(t *testing.T) { TestReads(ctx, t, impl, dir+"/reads.txt") })
   376  	t.Run(iName+"_Writes", func(t *testing.T) { TestWrites(ctx, t, impl, dir+"/writes") })
   377  	t.Run(iName+"_Discard", func(t *testing.T) { TestDiscard(ctx, t, impl, dir+"/discard") })
   378  	t.Run(iName+"_Remove", func(t *testing.T) { TestRemove(ctx, t, impl, dir+"/remove.txt") })
   379  	t.Run(iName+"_Stat", func(t *testing.T) { TestStat(ctx, t, impl, dir+"/stat.txt") })
   380  	t.Run(iName+"_List", func(t *testing.T) { TestList(ctx, t, impl, dir+"/match") })
   381  	t.Run(iName+"_ListDir", func(t *testing.T) { TestListDir(ctx, t, impl, dir+"/dirmatch") })
   382  }
   383  
   384  // TestConcurrentOffsetReads tests arbitrarily-ordered, concurrent reads.
   385  func TestConcurrentOffsetReads(
   386  	ctx context.Context,
   387  	t *testing.T,
   388  	impl file.Implementation,
   389  	path string,
   390  ) {
   391  	expected := "A purple fox jumped over a blue cat"
   392  	doWriteFile(ctx, t, impl, path, expected)
   393  
   394  	parallelism := runtime.NumCPU()
   395  	const readsPerShard = 1024
   396  
   397  	f, err := impl.Open(ctx, path)
   398  	assert.NoError(t, err)
   399  
   400  	rnds := make([]*rand.Rand, parallelism)
   401  	rnds[0] = rand.New(rand.NewSource(1))
   402  	for i := 1; i < len(rnds); i++ {
   403  		rnds[i] = rand.New(rand.NewSource(rnds[0].Int63()))
   404  	}
   405  
   406  	assert.NoError(t, traverse.Limit(parallelism).Each(parallelism, func(shard int) (err error) {
   407  		rnd := rnds[shard]
   408  		for i := 0; i < readsPerShard; i++ {
   409  			start := rnd.Intn(len(expected))
   410  			limit := start + rnd.Intn(len(expected)+1-start)
   411  			got := make([]byte, limit-start)
   412  			rc := f.OffsetReader(int64(start))
   413  			defer errors.CleanUpCtx(ctx, rc.Close, &err)
   414  			_, err = io.ReadFull(ioctx.ToStdReader(ctx, rc), got)
   415  			if err != nil {
   416  				return err
   417  			}
   418  			if got, want := string(got), expected[start:limit]; got != want {
   419  				return fmt.Errorf("got: %s, want: %s", got, want)
   420  			}
   421  		}
   422  		return nil
   423  	}))
   424  
   425  	assert.NoError(t, f.Close(ctx))
   426  }