github.com/grailbio/base@v0.0.11/file/addfs/unzipfs/unzipfs_test.go (about)

     1  package unzipfs
     2  
     3  import (
     4  	"archive/zip"
     5  	"bytes"
     6  	"context"
     7  	"io"
     8  	"log"
     9  	"strings"
    10  	"sync"
    11  	"testing"
    12  	"time"
    13  
    14  	"github.com/google/go-cmp/cmp"
    15  	"github.com/grailbio/base/file/fsnode"
    16  	. "github.com/grailbio/base/file/fsnode/fsnodetesting"
    17  	"github.com/grailbio/base/ioctx"
    18  	"github.com/grailbio/base/ioctx/fsctx"
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  )
    22  
    23  func TestParent(t *testing.T) {
    24  	ctx := context.Background()
    25  	baseTime := time.Unix(1_600_000_000, 0)
    26  
    27  	var zipBytes bytes.Buffer
    28  	zipW := zip.NewWriter(&zipBytes)
    29  
    30  	a0Info := fsnode.NewRegInfo("0.txt").WithModTime(baseTime).WithModePerm(0600)
    31  	a0Content := "a0"
    32  	addFile(t, zipW, "a/", &a0Info, a0Content, true)
    33  
    34  	a00Info := fsnode.NewRegInfo("0.exe").WithModTime(baseTime.Add(time.Hour)).WithModePerm(0755)
    35  	a00Content := "a00"
    36  	addFile(t, zipW, "a/0/", &a00Info, a00Content, true)
    37  
    38  	b0Info := fsnode.NewRegInfo("0.txt").WithModTime(baseTime.Add(2 * time.Hour)).WithModePerm(0644)
    39  	b0Content := "b0"
    40  	addFile(t, zipW, "b/", &b0Info, b0Content, false)
    41  
    42  	topInfo := fsnode.NewRegInfo("0.txt").WithModTime(baseTime.Add(3 * time.Hour)).WithModePerm(0600)
    43  	topContent := "top"
    44  	addFile(t, zipW, "", &topInfo, topContent, false)
    45  
    46  	require.NoError(t, zipW.Close())
    47  
    48  	parentInfo := fsnode.NewDirInfo("unzip")
    49  	parent, err := parentFromLeaf(ctx, parentInfo, fsnode.ConstLeaf(fsnode.NewRegInfo("zip"), zipBytes.Bytes()))
    50  	require.NotNil(t, parent)
    51  	require.NoError(t, err)
    52  
    53  	walker := Walker{Info: true}
    54  	diff := cmp.Diff(
    55  		InfoT{parentInfo, Parent{
    56  			"a": InfoT{
    57  				fsnode.NewDirInfo("a"),
    58  				Parent{
    59  					a0Info.Name(): InfoT{a0Info, Leaf([]byte(a0Content))},
    60  					"0": InfoT{
    61  						fsnode.NewDirInfo("0"),
    62  						Parent{
    63  							a00Info.Name(): InfoT{a00Info, Leaf([]byte(a00Content))},
    64  						},
    65  					},
    66  				},
    67  			},
    68  			"b": InfoT{
    69  				fsnode.NewDirInfo("b"),
    70  				Parent{
    71  					b0Info.Name(): InfoT{b0Info, Leaf([]byte(b0Content))},
    72  				},
    73  			},
    74  			topInfo.Name(): InfoT{topInfo, Leaf([]byte(topContent))},
    75  		}},
    76  		walker.WalkContents(ctx, t, parent),
    77  		cmp.Comparer(func(a, b fsnode.FileInfo) bool {
    78  			a, b = a.WithSys(nil), b.WithSys(nil)
    79  			return a.Equal(b)
    80  		}),
    81  	)
    82  	assert.Empty(t, diff)
    83  }
    84  
    85  func addFile(t *testing.T, zipW *zip.Writer, prefix string, info *fsnode.FileInfo, content string, flate bool) {
    86  	*info = info.WithSize(int64(len(content)))
    87  	hdr, err := zip.FileInfoHeader(*info)
    88  	hdr.Name = prefix + info.Name()
    89  	if flate {
    90  		hdr.Method = zip.Deflate
    91  	}
    92  	require.NoError(t, err)
    93  	fw, err := zipW.CreateHeader(hdr)
    94  	require.NoError(t, err)
    95  	_, err = io.Copy(fw, strings.NewReader(content))
    96  	require.NoError(t, err)
    97  }
    98  
    99  func TestNonZip(t *testing.T) {
   100  	ctx := context.Background()
   101  	parent, err := parentFromLeaf(ctx,
   102  		fsnode.NewDirInfo("unzip"),
   103  		fsnode.ConstLeaf(fsnode.NewRegInfo("zip"), []byte("not zip")))
   104  	require.NoError(t, err)
   105  	require.Nil(t, parent)
   106  }
   107  
   108  func TestReadCancel(t *testing.T) {
   109  	ctx := context.Background()
   110  
   111  	var zipBytes bytes.Buffer
   112  	zipW := zip.NewWriter(&zipBytes)
   113  
   114  	fInfo := fsnode.NewRegInfo("f.txt")
   115  	// We need to make sure our reads below will exceed internal buffer sizes so we can control
   116  	// underlying blocking. Empirically this seems big enough but it may need to increase if
   117  	// there are internal changes (in flate, etc.) in the future.
   118  	fContent := strings.Repeat("a", 50*1024*1024)
   119  	addFile(t, zipW, "", &fInfo, fContent, true)
   120  
   121  	require.NoError(t, zipW.Close())
   122  
   123  	// First we allow unblocked reads for zip headers.
   124  	zipLeaf := pausingLeaf{Leaf: fsnode.ConstLeaf(fsnode.NewRegInfo("zip"), zipBytes.Bytes())}
   125  	parent, err := parentFromLeaf(ctx, fsnode.NewDirInfo("unzip"), &zipLeaf)
   126  	require.NoError(t, err)
   127  	require.NotNil(t, parent)
   128  	children, err := fsnode.IterateAll(ctx, parent.Children())
   129  	require.NoError(t, err)
   130  	require.Equal(t, 1, len(children))
   131  	fLeaf := children[0].(fsnode.Leaf)
   132  
   133  	f, err := fsnode.Open(ctx, fLeaf)
   134  	require.NoError(t, err)
   135  
   136  	// Set up read blocking.
   137  	waitC := make(chan struct{})
   138  	zipLeaf.mu.Lock()
   139  	zipLeaf.readAtWaitTwiceC = waitC
   140  	zipLeaf.mu.Unlock()
   141  
   142  	var n int
   143  	b := make([]byte, 2)
   144  	readC := make(chan struct{})
   145  	go func() {
   146  		defer close(readC)
   147  		n, err = f.Read(ctx, b)
   148  	}()
   149  	waitC <- struct{}{} // Let the read go through.
   150  	waitC <- struct{}{}
   151  	<-readC
   152  	require.NoError(t, err)
   153  	require.Equal(t, 2, n)
   154  	require.Equal(t, fContent[:2], string(b))
   155  
   156  	// Now start another read and let it reach ReadAt (first waitC send).
   157  	ctxCancel, cancel := context.WithCancel(ctx)
   158  	readC = make(chan struct{})
   159  	go func() {
   160  		defer close(readC)
   161  		// Make sure this read will exhaust internal buffers (in flate, etc.) forcing a read from
   162  		// the pausingFile we control.
   163  		_, err = io.ReadAll(ioctx.ToStdReader(ctxCancel, f))
   164  	}()
   165  	waitC <- struct{}{}
   166  	cancel() // Cancel the context instead of letting the read finish.
   167  	<-readC
   168  	// Make sure we get a cancellation error.
   169  	require.ErrorIs(t, err, context.Canceled)
   170  }
   171  
   172  // pausingLeaf returns Files (from Open) that read one item C (which may block) at the start of
   173  // each ReadAt operation. If C is nil, ReadAt's don't block.
   174  type pausingLeaf struct {
   175  	fsnode.Leaf
   176  	mu sync.Mutex // mu guards readAtWaitTwiceC
   177  	// readAtWaitTwiceC controls ReadAt's blocking. If non-nil, ReadAt will read two values from
   178  	// this channel before returning.
   179  	readAtWaitTwiceC <-chan struct{}
   180  }
   181  
   182  func (*pausingLeaf) FSNodeT() {}
   183  func (p *pausingLeaf) OpenFile(ctx context.Context, flag int) (fsctx.File, error) {
   184  	f, err := fsnode.Open(ctx, p.Leaf)
   185  	return pausingFile{p, f}, err
   186  }
   187  
   188  type pausingFile struct {
   189  	leaf *pausingLeaf
   190  	fsctx.File
   191  }
   192  
   193  func (p pausingFile) ReadAt(ctx context.Context, dst []byte, off int64) (n int, err error) {
   194  	p.leaf.mu.Lock()
   195  	waitC := p.leaf.readAtWaitTwiceC
   196  	p.leaf.mu.Unlock()
   197  	if waitC != nil {
   198  		for i := 0; i < 2; i++ {
   199  			log.Printf("pausing: waiting %d", i)
   200  			select {
   201  			case <-waitC:
   202  			case <-ctx.Done():
   203  				return 0, ctx.Err()
   204  			}
   205  		}
   206  	} else {
   207  		log.Printf("pausing: nil")
   208  	}
   209  	return p.File.(ioctx.ReaderAt).ReadAt(ctx, dst, off)
   210  }