github.com/grailbio/base@v0.0.11/file/addfs/unzipfs/unzipfs_test.go (about) 1 package unzipfs 2 3 import ( 4 "archive/zip" 5 "bytes" 6 "context" 7 "io" 8 "log" 9 "strings" 10 "sync" 11 "testing" 12 "time" 13 14 "github.com/google/go-cmp/cmp" 15 "github.com/grailbio/base/file/fsnode" 16 . "github.com/grailbio/base/file/fsnode/fsnodetesting" 17 "github.com/grailbio/base/ioctx" 18 "github.com/grailbio/base/ioctx/fsctx" 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 ) 22 23 func TestParent(t *testing.T) { 24 ctx := context.Background() 25 baseTime := time.Unix(1_600_000_000, 0) 26 27 var zipBytes bytes.Buffer 28 zipW := zip.NewWriter(&zipBytes) 29 30 a0Info := fsnode.NewRegInfo("0.txt").WithModTime(baseTime).WithModePerm(0600) 31 a0Content := "a0" 32 addFile(t, zipW, "a/", &a0Info, a0Content, true) 33 34 a00Info := fsnode.NewRegInfo("0.exe").WithModTime(baseTime.Add(time.Hour)).WithModePerm(0755) 35 a00Content := "a00" 36 addFile(t, zipW, "a/0/", &a00Info, a00Content, true) 37 38 b0Info := fsnode.NewRegInfo("0.txt").WithModTime(baseTime.Add(2 * time.Hour)).WithModePerm(0644) 39 b0Content := "b0" 40 addFile(t, zipW, "b/", &b0Info, b0Content, false) 41 42 topInfo := fsnode.NewRegInfo("0.txt").WithModTime(baseTime.Add(3 * time.Hour)).WithModePerm(0600) 43 topContent := "top" 44 addFile(t, zipW, "", &topInfo, topContent, false) 45 46 require.NoError(t, zipW.Close()) 47 48 parentInfo := fsnode.NewDirInfo("unzip") 49 parent, err := parentFromLeaf(ctx, parentInfo, fsnode.ConstLeaf(fsnode.NewRegInfo("zip"), zipBytes.Bytes())) 50 require.NotNil(t, parent) 51 require.NoError(t, err) 52 53 walker := Walker{Info: true} 54 diff := cmp.Diff( 55 InfoT{parentInfo, Parent{ 56 "a": InfoT{ 57 fsnode.NewDirInfo("a"), 58 Parent{ 59 a0Info.Name(): InfoT{a0Info, Leaf([]byte(a0Content))}, 60 "0": InfoT{ 61 fsnode.NewDirInfo("0"), 62 Parent{ 63 a00Info.Name(): InfoT{a00Info, Leaf([]byte(a00Content))}, 64 }, 65 }, 66 }, 67 }, 68 "b": InfoT{ 69 fsnode.NewDirInfo("b"), 70 Parent{ 71 b0Info.Name(): InfoT{b0Info, Leaf([]byte(b0Content))}, 72 }, 73 }, 74 topInfo.Name(): InfoT{topInfo, Leaf([]byte(topContent))}, 75 }}, 76 walker.WalkContents(ctx, t, parent), 77 cmp.Comparer(func(a, b fsnode.FileInfo) bool { 78 a, b = a.WithSys(nil), b.WithSys(nil) 79 return a.Equal(b) 80 }), 81 ) 82 assert.Empty(t, diff) 83 } 84 85 func addFile(t *testing.T, zipW *zip.Writer, prefix string, info *fsnode.FileInfo, content string, flate bool) { 86 *info = info.WithSize(int64(len(content))) 87 hdr, err := zip.FileInfoHeader(*info) 88 hdr.Name = prefix + info.Name() 89 if flate { 90 hdr.Method = zip.Deflate 91 } 92 require.NoError(t, err) 93 fw, err := zipW.CreateHeader(hdr) 94 require.NoError(t, err) 95 _, err = io.Copy(fw, strings.NewReader(content)) 96 require.NoError(t, err) 97 } 98 99 func TestNonZip(t *testing.T) { 100 ctx := context.Background() 101 parent, err := parentFromLeaf(ctx, 102 fsnode.NewDirInfo("unzip"), 103 fsnode.ConstLeaf(fsnode.NewRegInfo("zip"), []byte("not zip"))) 104 require.NoError(t, err) 105 require.Nil(t, parent) 106 } 107 108 func TestReadCancel(t *testing.T) { 109 ctx := context.Background() 110 111 var zipBytes bytes.Buffer 112 zipW := zip.NewWriter(&zipBytes) 113 114 fInfo := fsnode.NewRegInfo("f.txt") 115 // We need to make sure our reads below will exceed internal buffer sizes so we can control 116 // underlying blocking. Empirically this seems big enough but it may need to increase if 117 // there are internal changes (in flate, etc.) in the future. 118 fContent := strings.Repeat("a", 50*1024*1024) 119 addFile(t, zipW, "", &fInfo, fContent, true) 120 121 require.NoError(t, zipW.Close()) 122 123 // First we allow unblocked reads for zip headers. 124 zipLeaf := pausingLeaf{Leaf: fsnode.ConstLeaf(fsnode.NewRegInfo("zip"), zipBytes.Bytes())} 125 parent, err := parentFromLeaf(ctx, fsnode.NewDirInfo("unzip"), &zipLeaf) 126 require.NoError(t, err) 127 require.NotNil(t, parent) 128 children, err := fsnode.IterateAll(ctx, parent.Children()) 129 require.NoError(t, err) 130 require.Equal(t, 1, len(children)) 131 fLeaf := children[0].(fsnode.Leaf) 132 133 f, err := fsnode.Open(ctx, fLeaf) 134 require.NoError(t, err) 135 136 // Set up read blocking. 137 waitC := make(chan struct{}) 138 zipLeaf.mu.Lock() 139 zipLeaf.readAtWaitTwiceC = waitC 140 zipLeaf.mu.Unlock() 141 142 var n int 143 b := make([]byte, 2) 144 readC := make(chan struct{}) 145 go func() { 146 defer close(readC) 147 n, err = f.Read(ctx, b) 148 }() 149 waitC <- struct{}{} // Let the read go through. 150 waitC <- struct{}{} 151 <-readC 152 require.NoError(t, err) 153 require.Equal(t, 2, n) 154 require.Equal(t, fContent[:2], string(b)) 155 156 // Now start another read and let it reach ReadAt (first waitC send). 157 ctxCancel, cancel := context.WithCancel(ctx) 158 readC = make(chan struct{}) 159 go func() { 160 defer close(readC) 161 // Make sure this read will exhaust internal buffers (in flate, etc.) forcing a read from 162 // the pausingFile we control. 163 _, err = io.ReadAll(ioctx.ToStdReader(ctxCancel, f)) 164 }() 165 waitC <- struct{}{} 166 cancel() // Cancel the context instead of letting the read finish. 167 <-readC 168 // Make sure we get a cancellation error. 169 require.ErrorIs(t, err, context.Canceled) 170 } 171 172 // pausingLeaf returns Files (from Open) that read one item C (which may block) at the start of 173 // each ReadAt operation. If C is nil, ReadAt's don't block. 174 type pausingLeaf struct { 175 fsnode.Leaf 176 mu sync.Mutex // mu guards readAtWaitTwiceC 177 // readAtWaitTwiceC controls ReadAt's blocking. If non-nil, ReadAt will read two values from 178 // this channel before returning. 179 readAtWaitTwiceC <-chan struct{} 180 } 181 182 func (*pausingLeaf) FSNodeT() {} 183 func (p *pausingLeaf) OpenFile(ctx context.Context, flag int) (fsctx.File, error) { 184 f, err := fsnode.Open(ctx, p.Leaf) 185 return pausingFile{p, f}, err 186 } 187 188 type pausingFile struct { 189 leaf *pausingLeaf 190 fsctx.File 191 } 192 193 func (p pausingFile) ReadAt(ctx context.Context, dst []byte, off int64) (n int, err error) { 194 p.leaf.mu.Lock() 195 waitC := p.leaf.readAtWaitTwiceC 196 p.leaf.mu.Unlock() 197 if waitC != nil { 198 for i := 0; i < 2; i++ { 199 log.Printf("pausing: waiting %d", i) 200 select { 201 case <-waitC: 202 case <-ctx.Done(): 203 return 0, ctx.Err() 204 } 205 } 206 } else { 207 log.Printf("pausing: nil") 208 } 209 return p.File.(ioctx.ReaderAt).ReadAt(ctx, dst, off) 210 }