github.com/creachadair/ffs@v0.17.3/block/splitter_test.go (about) 1 // Copyright 2019 Michael J. Fromberger. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package block_test 16 17 import ( 18 "bytes" 19 "io" 20 "math/rand" 21 "reflect" 22 "strings" 23 "testing" 24 25 "github.com/creachadair/ffs/block" 26 ) 27 28 // burstyReader implements io.Reader, returning chunks from r whose size is 29 // bounded above by the specified byte lengths, to simulate a reader that does 30 // not always deliver all that was requested. 31 type burstyReader struct { 32 r io.Reader 33 len []int 34 pos int 35 } 36 37 func (b *burstyReader) Read(buf []byte) (int, error) { 38 cap := len(buf) 39 if len(b.len) > b.pos { 40 if n := b.len[b.pos]; n < cap { 41 cap = b.len[b.pos] 42 } 43 b.pos = (b.pos + 1) % len(b.len) 44 } 45 return b.r.Read(buf[:cap]) 46 } 47 48 func newBurstyReader(s string, sizes ...int) io.Reader { 49 return &burstyReader{strings.NewReader(s), sizes, 0} 50 } 51 52 // dummyHash is a mock Hash implementation used for testing a block.Splitter. 53 // It returns a fixed value for all updates except a designated value. 54 type dummyHash struct { 55 magic byte 56 hash uint64 57 size int 58 } 59 60 func (d dummyHash) Hash() block.Hash { return d } 61 62 func (d dummyHash) Update(in byte) uint64 { 63 if in == d.magic { 64 return 1 65 } 66 return d.hash 67 } 68 69 func TestSplitterMin(t *testing.T) { 70 const minBytes = 10 71 d := dummyHash{ 72 magic: '|', 73 hash: 12345, 74 size: 1, 75 } 76 r := strings.NewReader("abc|def|ghi|jkl|mno") 77 s := block.NewSplitter(r, &block.SplitConfig{ 78 Hasher: d, 79 Min: minBytes, 80 }) 81 b, err := s.Next() 82 if err != nil { 83 t.Fatal(err) 84 } 85 if len(b) < minBytes { 86 t.Errorf("len(b): got %d, want at least %d", len(b), minBytes) 87 } 88 t.Logf("b=%q", string(b)) 89 } 90 91 func TestSplitterMax(t *testing.T) { 92 const maxBytes = 10 93 d := dummyHash{ 94 hash: 12345, 95 size: 1, 96 } 97 r := strings.NewReader("abc|def|ghi|jkl|mno") 98 s := block.NewSplitter(r, &block.SplitConfig{ 99 Hasher: d, 100 Max: maxBytes, 101 }) 102 b, err := s.Next() 103 if err != nil { 104 t.Fatal(err) 105 } 106 if len(b) > maxBytes { 107 t.Errorf("len(b): got %d, want at most %d", len(b), maxBytes) 108 } 109 t.Logf("b=%q", string(b)) 110 } 111 112 func TestSplitterBlocks(t *testing.T) { 113 tests := []struct { 114 input string 115 min, max int 116 blocks []string 117 }{ 118 // In these test cases, any "|" in the input triggers a hash cut. This 119 // permits us to verify the various corner cases of when a cut occurs 120 // vs. the length constraints. 121 {"", 5, 15, nil}, 122 {"abc", 5, 15, []string{"abc"}}, 123 {"|", 0, 15, []string{"|"}}, 124 {"x||y", 1, 15, []string{"x", "|", "|y"}}, 125 {"|||x", 1, 5, []string{"|", "|", "|x"}}, 126 {"a|bc|defg|hijklmno|pqrst", 2, 8, []string{"a|bc", "|defg", "|hijklmn", "o|pqrst"}}, 127 {"abcdefgh|ijklmnop|||q", 5, 100, []string{"abcdefgh", "|ijklmnop", "|||q"}}, 128 {"a|b|c|d|e|", 1, 2, []string{"a", "|b", "|c", "|d", "|e", "|"}}, 129 {"abcdefghijk", 4, 4, []string{"abcd", "efgh", "ijk"}}, 130 } 131 d := dummyHash{ 132 magic: '|', 133 hash: 12345, 134 size: 5, 135 } 136 for _, test := range tests { 137 r := newBurstyReader(test.input, 3, 5, 1, 4, 17, 20) 138 s := block.NewSplitter(r, &block.SplitConfig{ 139 Hasher: d, 140 Min: test.min, 141 Max: test.max, 142 }) 143 var bs []string 144 if err := s.Split(func(b []byte) error { 145 bs = append(bs, string(b)) 146 return nil 147 }); err != nil { 148 t.Fatal(err) 149 } 150 if !reflect.DeepEqual(bs, test.blocks) { 151 t.Errorf("split %q: got %+q, want %+q", test.input, bs, test.blocks) 152 } 153 } 154 } 155 156 func TestLongValue(t *testing.T) { 157 rng := rand.New(rand.NewSource(1)) // change to update test data 158 159 const alphabet = "abcdefghijklmnopqrstuvwxyz 0123456789" 160 const inputLen = 32000 161 var buf bytes.Buffer 162 for buf.Len() < inputLen { 163 buf.WriteByte(alphabet[rng.Intn(len(alphabet))]) 164 } 165 cfg := &block.SplitConfig{ 166 Min: 200, 167 Size: 800, 168 Max: 20000, 169 } 170 s := block.NewSplitter(&buf, cfg) 171 var total int 172 var sizes []int 173 if err := s.Split(func(blk []byte) error { 174 total += len(blk) 175 sizes = append(sizes, len(blk)) 176 if len(blk) < cfg.Min { 177 t.Errorf("Block too short: %d bytes < %d", len(blk), cfg.Min) 178 179 // N.B. This could legitimately happen at end of input. 180 } else if len(blk) > cfg.Max { 181 t.Errorf("Block too long: %d bytes > %d", len(blk), cfg.Max) 182 } 183 return nil 184 }); err != nil { 185 t.Errorf("Split failed: %v", err) 186 } 187 t.Logf("Split: %d blocks, %d bytes total :: %+v", len(sizes), total, sizes) 188 if total != inputLen { 189 t.Errorf("Total size of blocks: got %d, want %d", total, inputLen) 190 } 191 }