github.com/creachadair/ffs@v0.17.3/block/splitter_test.go

github.com/creachadair/ffs@v0.17.3/block/splitter_test.go (about)

     1  // Copyright 2019 Michael J. Fromberger. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package block_test
    16  
    17  import (
    18  	"bytes"
    19  	"io"
    20  	"math/rand"
    21  	"reflect"
    22  	"strings"
    23  	"testing"
    24  
    25  	"github.com/creachadair/ffs/block"
    26  )
    27  
    28  // burstyReader implements io.Reader, returning chunks from r whose size is
    29  // bounded above by the specified byte lengths, to simulate a reader that does
    30  // not always deliver all that was requested.
    31  type burstyReader struct {
    32  	r   io.Reader
    33  	len []int
    34  	pos int
    35  }
    36  
    37  func (b *burstyReader) Read(buf []byte) (int, error) {
    38  	cap := len(buf)
    39  	if len(b.len) > b.pos {
    40  		if n := b.len[b.pos]; n < cap {
    41  			cap = b.len[b.pos]
    42  		}
    43  		b.pos = (b.pos + 1) % len(b.len)
    44  	}
    45  	return b.r.Read(buf[:cap])
    46  }
    47  
    48  func newBurstyReader(s string, sizes ...int) io.Reader {
    49  	return &burstyReader{strings.NewReader(s), sizes, 0}
    50  }
    51  
    52  // dummyHash is a mock Hash implementation used for testing a block.Splitter.
    53  // It returns a fixed value for all updates except a designated value.
    54  type dummyHash struct {
    55  	magic byte
    56  	hash  uint64
    57  	size  int
    58  }
    59  
    60  func (d dummyHash) Hash() block.Hash { return d }
    61  
    62  func (d dummyHash) Update(in byte) uint64 {
    63  	if in == d.magic {
    64  		return 1
    65  	}
    66  	return d.hash
    67  }
    68  
    69  func TestSplitterMin(t *testing.T) {
    70  	const minBytes = 10
    71  	d := dummyHash{
    72  		magic: '|',
    73  		hash:  12345,
    74  		size:  1,
    75  	}
    76  	r := strings.NewReader("abc|def|ghi|jkl|mno")
    77  	s := block.NewSplitter(r, &block.SplitConfig{
    78  		Hasher: d,
    79  		Min:    minBytes,
    80  	})
    81  	b, err := s.Next()
    82  	if err != nil {
    83  		t.Fatal(err)
    84  	}
    85  	if len(b) < minBytes {
    86  		t.Errorf("len(b): got %d, want at least %d", len(b), minBytes)
    87  	}
    88  	t.Logf("b=%q", string(b))
    89  }
    90  
    91  func TestSplitterMax(t *testing.T) {
    92  	const maxBytes = 10
    93  	d := dummyHash{
    94  		hash: 12345,
    95  		size: 1,
    96  	}
    97  	r := strings.NewReader("abc|def|ghi|jkl|mno")
    98  	s := block.NewSplitter(r, &block.SplitConfig{
    99  		Hasher: d,
   100  		Max:    maxBytes,
   101  	})
   102  	b, err := s.Next()
   103  	if err != nil {
   104  		t.Fatal(err)
   105  	}
   106  	if len(b) > maxBytes {
   107  		t.Errorf("len(b): got %d, want at most %d", len(b), maxBytes)
   108  	}
   109  	t.Logf("b=%q", string(b))
   110  }
   111  
   112  func TestSplitterBlocks(t *testing.T) {
   113  	tests := []struct {
   114  		input    string
   115  		min, max int
   116  		blocks   []string
   117  	}{
   118  		// In these test cases, any "|" in the input triggers a hash cut.  This
   119  		// permits us to verify the various corner cases of when a cut occurs
   120  		// vs. the length constraints.
   121  		{"", 5, 15, nil},
   122  		{"abc", 5, 15, []string{"abc"}},
   123  		{"|", 0, 15, []string{"|"}},
   124  		{"x||y", 1, 15, []string{"x", "|", "|y"}},
   125  		{"|||x", 1, 5, []string{"|", "|", "|x"}},
   126  		{"a|bc|defg|hijklmno|pqrst", 2, 8, []string{"a|bc", "|defg", "|hijklmn", "o|pqrst"}},
   127  		{"abcdefgh|ijklmnop|||q", 5, 100, []string{"abcdefgh", "|ijklmnop", "|||q"}},
   128  		{"a|b|c|d|e|", 1, 2, []string{"a", "|b", "|c", "|d", "|e", "|"}},
   129  		{"abcdefghijk", 4, 4, []string{"abcd", "efgh", "ijk"}},
   130  	}
   131  	d := dummyHash{
   132  		magic: '|',
   133  		hash:  12345,
   134  		size:  5,
   135  	}
   136  	for _, test := range tests {
   137  		r := newBurstyReader(test.input, 3, 5, 1, 4, 17, 20)
   138  		s := block.NewSplitter(r, &block.SplitConfig{
   139  			Hasher: d,
   140  			Min:    test.min,
   141  			Max:    test.max,
   142  		})
   143  		var bs []string
   144  		if err := s.Split(func(b []byte) error {
   145  			bs = append(bs, string(b))
   146  			return nil
   147  		}); err != nil {
   148  			t.Fatal(err)
   149  		}
   150  		if !reflect.DeepEqual(bs, test.blocks) {
   151  			t.Errorf("split %q: got %+q, want %+q", test.input, bs, test.blocks)
   152  		}
   153  	}
   154  }
   155  
   156  func TestLongValue(t *testing.T) {
   157  	rng := rand.New(rand.NewSource(1)) // change to update test data
   158  
   159  	const alphabet = "abcdefghijklmnopqrstuvwxyz 0123456789"
   160  	const inputLen = 32000
   161  	var buf bytes.Buffer
   162  	for buf.Len() < inputLen {
   163  		buf.WriteByte(alphabet[rng.Intn(len(alphabet))])
   164  	}
   165  	cfg := &block.SplitConfig{
   166  		Min:  200,
   167  		Size: 800,
   168  		Max:  20000,
   169  	}
   170  	s := block.NewSplitter(&buf, cfg)
   171  	var total int
   172  	var sizes []int
   173  	if err := s.Split(func(blk []byte) error {
   174  		total += len(blk)
   175  		sizes = append(sizes, len(blk))
   176  		if len(blk) < cfg.Min {
   177  			t.Errorf("Block too short: %d bytes < %d", len(blk), cfg.Min)
   178  
   179  			// N.B. This could legitimately happen at end of input.
   180  		} else if len(blk) > cfg.Max {
   181  			t.Errorf("Block too long: %d bytes > %d", len(blk), cfg.Max)
   182  		}
   183  		return nil
   184  	}); err != nil {
   185  		t.Errorf("Split failed: %v", err)
   186  	}
   187  	t.Logf("Split: %d blocks, %d bytes total :: %+v", len(sizes), total, sizes)
   188  	if total != inputLen {
   189  		t.Errorf("Total size of blocks: got %d, want %d", total, inputLen)
   190  	}
   191  }