github.com/creachadair/ffs@v0.17.3/file/data_test.go (about) 1 // Copyright 2019 Michael J. Fromberger. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package file 16 17 import ( 18 "bytes" 19 "context" 20 "crypto/sha1" 21 "io" 22 "math/rand" 23 "strconv" 24 "strings" 25 "testing" 26 27 "github.com/creachadair/ffs/blob" 28 "github.com/creachadair/ffs/blob/memstore" 29 "github.com/creachadair/ffs/block" 30 "github.com/creachadair/ffs/file/wiretype" 31 "github.com/creachadair/mds/mapset" 32 "github.com/google/go-cmp/cmp" 33 "github.com/google/go-cmp/cmp/cmpopts" 34 "golang.org/x/crypto/blake2b" 35 ) 36 37 var cmpFileDataOpts = []cmp.Option{ 38 cmp.AllowUnexported(fileData{}, extent{}, cblock{}), 39 cmpopts.IgnoreFields(fileData{}, "sc"), 40 } 41 42 func TestIndex(t *testing.T) { 43 d := newDataTester(t, &block.SplitConfig{Min: 1024}) // in effect, "don't split" 44 45 type index struct { 46 totalBytes int64 47 extents []*extent 48 } 49 checkIndex := func(want index) { 50 // We have to tell cmp that it's OK to look at unexported fields on these types. 51 opt := cmp.AllowUnexported(index{}, extent{}, cblock{}) 52 got := index{totalBytes: d.fd.totalBytes, extents: d.fd.extents} 53 if diff := cmp.Diff(want, got, opt); diff != "" { 54 t.Errorf("Incorrect index (-want, +got)\n%s", diff) 55 } 56 } 57 58 // Write some discontiguous regions into the file and verify that the 59 // resulting index is correct. 60 d.checkString(0, 10, "") 61 62 d.writeString("foobar", 0) 63 d.checkString(0, 6, "foobar") 64 d.checkString(3, 6, "bar") 65 // foobar 66 67 d.writeString("foobar", 10) 68 d.checkString(10, 6, "foobar") 69 d.checkString(0, 16, "foobar\x00\x00\x00\x00foobar") 70 // foobar----foobar 71 72 d.writeString("aliquot", 20) 73 d.checkString(0, 100, "foobar\x00\x00\x00\x00foobar\x00\x00\x00\x00aliquot") 74 // foobar----foobar----aliquot 75 76 checkIndex(index{ 77 totalBytes: 27, 78 extents: []*extent{ 79 {base: 0, bytes: 6, blocks: []cblock{{6, hashOf("foobar")}}, starts: []int64{0}}, 80 {base: 10, bytes: 6, blocks: []cblock{{6, hashOf("foobar")}}, starts: []int64{10}}, 81 {base: 20, bytes: 7, blocks: []cblock{{7, hashOf("aliquot")}}, starts: []int64{20}}, 82 }, 83 }) 84 85 d.writeString("barbarossa", 3) 86 d.checkString(0, 100, "foobarbarossabar\x00\x00\x00\x00aliquot") 87 // foo..........bar----aliquot 88 // ^^^barbarossa^^^ preserved block contents outside overlap (^) 89 90 d.truncate(6) 91 // foobar 92 93 d.checkString(0, 16, "foobar") 94 checkIndex(index{ 95 totalBytes: 6, 96 extents: []*extent{ 97 {base: 0, bytes: 6, blocks: []cblock{{6, hashOf("foobar")}}, starts: []int64{0}}, 98 }, 99 }) 100 101 d.writeString("kinghell", 3) 102 d.checkString(0, 11, "fookinghell") 103 // fookinghell 104 105 checkIndex(index{ 106 totalBytes: 11, 107 extents: []*extent{ 108 {base: 0, bytes: 11, blocks: []cblock{{11, hashOf("fookinghell")}}, starts: []int64{0}}, 109 }, 110 }) 111 112 d.writeString("mate", 11) 113 d.checkString(0, 15, "fookinghellmate") 114 // fookinghellmate 115 116 checkIndex(index{ 117 totalBytes: 15, 118 extents: []*extent{ // these adjacent blocks should be merged (with no split) 119 {base: 0, bytes: 15, blocks: []cblock{{15, hashOf("fookinghellmate")}}, starts: []int64{0}}, 120 }, 121 }) 122 123 d.writeString("cor", 20) 124 d.checkString(0, 100, "fookinghellmate\x00\x00\x00\x00\x00cor") 125 // fookinghellmate-----cor 126 127 checkIndex(index{ 128 totalBytes: 23, 129 extents: []*extent{ 130 {base: 0, bytes: 15, blocks: []cblock{{15, hashOf("fookinghellmate")}}, starts: []int64{0}}, 131 {base: 20, bytes: 3, blocks: []cblock{{3, hashOf("cor")}}, starts: []int64{20}}, 132 }, 133 }) 134 135 d.writeString("THEEND", 30) 136 d.checkString(0, 100, "fookinghellmate\x00\x00\x00\x00\x00cor\x00\x00\x00\x00\x00\x00\x00THEEND") 137 checkIndex(index{ 138 totalBytes: 36, 139 extents: []*extent{ 140 {base: 0, bytes: 15, blocks: []cblock{{15, hashOf("fookinghellmate")}}, starts: []int64{0}}, 141 {base: 20, bytes: 3, blocks: []cblock{{3, hashOf("cor")}}, starts: []int64{20}}, 142 {base: 30, bytes: 6, blocks: []cblock{{6, hashOf("THEEND")}}, starts: []int64{30}}, 143 }, 144 }) 145 146 // Verify read boundary cases. 147 d.checkString(24, 3, "\x00\x00\x00") // entirely unstored 148 d.checkString(11, 6, "mate\x00\x00") // partly stored, partly unstored 149 d.checkString(25, 100, "\x00\x00\x00\x00\x00THEEND") // partly unstored, partly stored 150 d.checkString(18, 7, "\x00\x00cor\x00\x00") // unstored, stored, unstored 151 } 152 153 func TestWireEncoding(t *testing.T) { 154 155 t.Run("SingleBlock", func(t *testing.T) { 156 d := &fileData{totalBytes: 10, extents: []*extent{ 157 {bytes: 10, blocks: []cblock{{bytes: 10, key: "foo"}}}, 158 }} 159 idx := d.toWireType() 160 if idx.TotalBytes != 10 { 161 t.Errorf("Index total bytes: got %d, want 10", idx.TotalBytes) 162 } 163 if s := string(idx.Single); s != "foo" { 164 t.Errorf("Index single key: got %q, want foo", s) 165 } 166 if len(idx.Extents) != 0 { 167 t.Errorf("Index has %d extents, want 0", len(idx.Extents)) 168 } 169 170 dx := new(fileData) 171 if err := dx.fromWireType(idx); err != nil { 172 t.Errorf("Decoding index failed: %v", err) 173 } 174 if diff := cmp.Diff(*d, *dx, cmpFileDataOpts...); diff != "" { 175 t.Errorf("Wrong decoded block (-want, +got)\n%s", diff) 176 } 177 }) 178 179 t.Run("MultipleBlocks", func(t *testing.T) { 180 d := &fileData{totalBytes: 15, extents: []*extent{ 181 {bytes: 15, blocks: []cblock{ 182 {bytes: 10, key: "foo"}, 183 {bytes: 5, key: "bar"}, 184 }}, 185 }} 186 idx := d.toWireType() 187 if idx.TotalBytes != 15 { 188 t.Errorf("Index total bytes: got %d, want 15", idx.TotalBytes) 189 } 190 if len(idx.Single) != 0 { 191 t.Errorf("Index single key: got %q, want empty", string(idx.Single)) 192 } 193 if len(idx.Extents) != 1 || len(idx.Extents[0].Blocks) != 2 { 194 t.Errorf("Index extents=%d, blocks=%d; want 1, 2", 195 len(idx.Extents), len(idx.Extents[0].Blocks)) 196 } 197 198 dx := new(fileData) 199 if err := dx.fromWireType(idx); err != nil { 200 t.Errorf("Decoding index failed: %v", err) 201 } 202 if diff := cmp.Diff(d, dx, cmpFileDataOpts...); diff != "" { 203 t.Errorf("Wrong decoded block (-want, +got)\n%s", diff) 204 } 205 }) 206 207 t.Run("NormalizeMergesExtents", func(t *testing.T) { 208 idx := &wiretype.Index{ 209 TotalBytes: 10, 210 Extents: []*wiretype.Extent{ 211 {Base: 0, Bytes: 3, Blocks: []*wiretype.Block{{Bytes: 3, Key: []byte("1")}}}, 212 {Base: 3, Bytes: 7, Blocks: []*wiretype.Block{{Bytes: 7, Key: []byte("2")}}}, 213 }, 214 } 215 216 dx := new(fileData) 217 if err := dx.fromWireType(idx); err != nil { 218 t.Errorf("Decoding index failed: %v", err) 219 } 220 want := &fileData{ 221 totalBytes: 10, 222 extents: []*extent{ 223 {base: 0, bytes: 10, blocks: []cblock{{bytes: 3, key: "1"}, {bytes: 7, key: "2"}}}, 224 }, 225 } 226 if diff := cmp.Diff(want, dx, cmpFileDataOpts...); diff != "" { 227 t.Errorf("Wrong decoded block (-want, +got)\n%s", diff) 228 } 229 }) 230 231 t.Run("NormalizeDropsEmpty", func(t *testing.T) { 232 idx := &wiretype.Index{ 233 TotalBytes: 20, 234 Extents: []*wiretype.Extent{ 235 {Base: 0, Bytes: 0}, 236 {Base: 3, Bytes: 7, Blocks: []*wiretype.Block{{Bytes: 7, Key: []byte("X")}}}, 237 {Base: 12, Bytes: 0}, 238 {Base: 15, Bytes: 5, Blocks: []*wiretype.Block{{Bytes: 5, Key: []byte("Y")}}}, 239 {Base: 144, Bytes: 0}, 240 }, 241 } 242 243 dx := new(fileData) 244 if err := dx.fromWireType(idx); err != nil { 245 t.Errorf("Decoding index failed: %v", err) 246 } 247 want := &fileData{ 248 totalBytes: 20, 249 extents: []*extent{ 250 {base: 3, bytes: 7, blocks: []cblock{{bytes: 7, key: "X"}}}, 251 {base: 15, bytes: 5, blocks: []cblock{{bytes: 5, key: "Y"}}}, 252 }, 253 } 254 if diff := cmp.Diff(want, dx, cmpFileDataOpts...); diff != "" { 255 t.Errorf("Wrong decoded block (-want, +got)\n%s", diff) 256 } 257 }) 258 } 259 260 func TestWriteBlocking(t *testing.T) { 261 ti := newTestInput("\x00\x00\x00\x00foo\x00\x00\x00\x00|barf\x00\x00\x00|\x00\x00\x00bazzu") 262 d := newDataTester(t, &block.SplitConfig{ 263 Hasher: ti, Min: 5, Size: 16, Max: 100, 264 }) 265 d.writeString(ti.input, 0) 266 want := &fileData{ 267 totalBytes: int64(ti.inputLen()), 268 extents: []*extent{ 269 {base: 4, bytes: 3, blocks: []cblock{{bytes: 3, key: hashOf("foo")}}}, 270 {base: 11, bytes: 4, blocks: []cblock{{bytes: 4, key: hashOf("barf")}}}, 271 {base: 21, bytes: 5, blocks: []cblock{{bytes: 5, key: hashOf("bazzu")}}}, 272 }, 273 } 274 if diff := cmp.Diff(want, d.fd, cmpFileDataOpts...); diff != "" { 275 t.Errorf("Wrong decoded block (-want, +got)\n%s", diff) 276 } 277 } 278 279 func TestReblocking(t *testing.T) { 280 d := newDataTester(t, &block.SplitConfig{Min: 200, Size: 1024, Max: 8192}) 281 rng := rand.New(rand.NewSource(1)) // change to update test data 282 283 const alphabet = "0123456789abcdef" 284 var buf bytes.Buffer 285 for buf.Len() < 4000 { 286 buf.WriteByte(alphabet[rng.Intn(len(alphabet))]) 287 } 288 fileData := buf.String() 289 290 // Write the data in a bunch of small contiguous chunks, and verify that the 291 // result reblocks adjacent chunks. 292 i, nb := 0, 0 293 for i < len(fileData) { 294 end := i + 25 295 if end > len(fileData) { 296 end = len(fileData) 297 } 298 d.writeString(fileData[i:end], int64(i)) 299 i = end 300 nb++ 301 } 302 303 check := func(want ...int64) { 304 var total int64 305 var got []int64 306 for _, ext := range d.fd.extents { 307 for _, blk := range ext.blocks { 308 total += blk.bytes 309 got = append(got, blk.bytes) 310 } 311 } 312 if diff := cmp.Diff(want, got); diff != "" { 313 t.Errorf("Wrong block sizes (-want, +got)\n%s", diff) 314 } 315 if int(total) != len(fileData) { 316 t.Errorf("Wrong total size: got %d, want %d", total, len(fileData)) 317 } 318 } 319 check(481, 2329, 413, 255, 522) // manually checked 320 321 // Now exactly overwrite one block, and verify that it updated its neighbor. 322 // Note that the tail of the original blocks should not be modified. 323 // 324 // Before: xxxx xxxxxxxxxxxxxxxxxxxxxxx xxxx xx xxxxx 325 // After: AAAAAAAAAAAAAAAAAAAAxxxxxxxx xxxx xx xxxxx 326 // Write: ^^^^^^^^^^^^^^^^^^^^ \----\--\---- unchanged 327 // 328 d.writeString(strings.Repeat("AAAA", 500), 0) 329 check(2810, 413, 255, 522) // manually checked; note tail is stable 330 331 t.Log("Block manifest:") 332 d.fd.blocks(func(size int64, key string) { 333 t.Logf("%-4d\t%x", size, []byte(key)) 334 }) 335 } 336 337 type testInput struct { 338 template string 339 splits mapset.Set[int] 340 input string 341 pos int 342 } 343 344 func TestNewFileData(t *testing.T) { 345 type extinfo struct { 346 Base, Bytes int64 347 Blocks int 348 } 349 tests := []struct { 350 input string 351 want []extinfo 352 }{ 353 { 354 // |<--------------- 43 bytes ------------------>| |<- 15 bytes -->| 355 // | block 1 ^ block 2 ^ block 3 | ...unstored... | block 1 | 356 input: "The first line|The second line|The third line|\x00\x00\x00\x00\x00|The fourth line", 357 want: []extinfo{{0, 43, 3}, {48, 15, 1}}, 358 }, 359 { 360 // sizes: 3 3 3 361 // unstored | | unstored | | unstored | | unstored | 362 input: "\x00\x00\x00foo|\x00\x00\x00bar\x00\x00\x00|baz\x00\x00\x00\x00", 363 want: []extinfo{{3, 3, 1}, {9, 3, 1}, {15, 3, 1}}, 364 }, 365 } 366 for i, test := range tests { 367 t.Run(strconv.Itoa(i+1), func(t *testing.T) { 368 ti := newTestInput(test.input) 369 s := block.NewSplitter(ti.reader(), &block.SplitConfig{ 370 Hasher: ti, Min: 5, Max: 100, Size: 16, 371 }) 372 373 // Generate a new data index from the input. We don't actually store 374 // any data here, just generate some plausible keys as if we did. 375 fd, err := newFileData(s, func(data []byte) (string, error) { 376 t.Logf("Block: %q", string(data)) 377 h := sha1.New() 378 h.Write(data) 379 return string(h.Sum(nil)), nil 380 }) 381 if err != nil { 382 t.Fatalf("newFileData failed: %v", err) 383 } 384 385 // Verify that the construction preserved all the input. 386 t.Logf("Input size: %d, total bytes: %d", ti.inputLen(), fd.totalBytes) 387 if want := ti.inputLen(); fd.totalBytes != int64(want) { 388 t.Errorf("TotalBytes: got %d, want %d", fd.totalBytes, want) 389 } 390 391 // Verify that the created extents match the template. 392 var got []extinfo 393 for i, ext := range fd.extents { 394 t.Logf("Extent %d base %d bytes %d", i+1, ext.base, ext.bytes) 395 got = append(got, extinfo{ 396 Base: ext.base, 397 Bytes: ext.bytes, 398 Blocks: len(ext.blocks), 399 }) 400 for j, b := range ext.blocks { 401 t.Logf("- E%d block %d: %d bytes, key=%x", i+1, j+1, b.bytes, b.key) 402 } 403 } 404 if diff := cmp.Diff(test.want, got); diff != "" { 405 t.Errorf("Wrong extents: (-want, +got):\n%s", diff) 406 } 407 }) 408 } 409 } 410 411 func TestBlockReader(t *testing.T) { 412 const message = "you are not the person we thought you were" 413 414 input := bytes.SplitAfter([]byte(message), []byte(" ")) 415 r := newBlockReader(input) 416 var data []byte 417 buf := make([]byte, 8) 418 for { 419 nr, err := r.Read(buf) 420 data = append(data, buf[:nr]...) 421 if err == io.EOF { 422 break 423 } else if err != nil { 424 t.Fatalf("Read failed: nr=%d, err=%v", nr, err) 425 } 426 } 427 got := string(data) 428 if got != message { 429 t.Errorf("Block reader:\n- got %q\n- want %q", got, message) 430 } 431 } 432 433 func hashOf(s string) string { 434 h := blake2b.Sum256([]byte(s)) 435 return string(h[:]) 436 } 437 438 type dataTester struct { 439 t *testing.T 440 ctx context.Context 441 cas blob.CAS 442 fd *fileData 443 } 444 445 func newDataTester(t *testing.T, sc *block.SplitConfig) *dataTester { 446 return &dataTester{ 447 t: t, 448 ctx: t.Context(), 449 cas: blob.CASFromKV(memstore.NewKV()), 450 fd: &fileData{sc: sc}, 451 } 452 } 453 454 func (d *dataTester) writeString(s string, at int64) { 455 d.t.Helper() 456 nw, err := d.fd.writeAt(d.ctx, d.cas, []byte(s), at) 457 d.t.Logf("Write %q at offset %d (%d, %v)", s, at, nw, err) 458 if err != nil { 459 d.t.Fatalf("writeAt(ctx, %q, %d): got (%d, %v), unexpected error", s, at, nw, err) 460 } else if nw != len(s) { 461 d.t.Errorf("writeAt(ctx, %q, %d): got %d, want %d", s, at, nw, len(s)) 462 } 463 } 464 465 func (d *dataTester) checkString(at, nb int64, want string) { 466 d.t.Helper() 467 buf := make([]byte, nb) 468 nr, err := d.fd.readAt(d.ctx, d.cas, buf, at) 469 d.t.Logf("Read %d from offset %d (%d, %v)", nb, at, nr, err) 470 if err != nil && err != io.EOF { 471 d.t.Fatalf("readAt(ctx, #[%d], %d): got (%d, %v), unexpected error", nb, at, nr, err) 472 } else if got := string(buf[:nr]); got != want { 473 d.t.Errorf("readAt(ctx, #[%d], %d): got %q, want %q", nb, at, got, want) 474 } 475 } 476 477 func (d *dataTester) truncate(at int64) { 478 d.t.Helper() 479 err := d.fd.truncate(d.ctx, d.cas, at) 480 d.t.Logf("truncate(ctx, %d) %v", at, err) 481 if err != nil { 482 d.t.Fatalf("truncate(ctx, %d): unexpected error: %v", at, err) 483 } 484 } 485 486 func newTestInput(template string) *testInput { 487 parts := strings.Split(template, "|") 488 splits := mapset.New[int]() 489 pos := 0 490 for _, p := range parts { 491 pos += len(p) 492 splits.Add(pos) 493 pos++ 494 } 495 return &testInput{ 496 template: template, 497 input: strings.Join(parts, ""), 498 splits: splits, 499 } 500 } 501 502 func (ti *testInput) reader() *strings.Reader { return strings.NewReader(ti.input) } 503 func (ti *testInput) inputLen() int { return len(ti.input) } 504 func (ti *testInput) inc() { ti.pos++ } 505 506 func (ti *testInput) Hash() block.Hash { return ti } 507 func (ti *testInput) Update(b byte) uint64 { 508 defer ti.inc() 509 if ti.splits.Has(ti.pos) { 510 return 1 511 } 512 return 2 513 }