github.com/grailbio/base@v0.0.11/recordio/transformer_test.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 package recordio_test 6 7 import ( 8 "bytes" 9 "testing" 10 11 "github.com/grailbio/base/recordio" 12 "github.com/grailbio/base/recordio/recordioflate" 13 "github.com/grailbio/base/recordio/recordiozstd" 14 "github.com/grailbio/testutil/assert" 15 ) 16 17 // Produce a recordio using transformer "name". Returns the ratio between the 18 // encoded size and input size. For a compressing transformer, the ratio should 19 // be ⋘ 1. 20 func transformerTest(t *testing.T, name string) float64 { 21 buf := &bytes.Buffer{} 22 wr := recordio.NewWriter(buf, recordio.WriterOpts{ 23 Transformers: []string{name}, 24 }) 25 // Write lots of compressible data. 26 const itemSize = 16 << 8 27 const nRecs = 300 28 for i := 0; i < nRecs; i++ { 29 data := make([]byte, itemSize) 30 for j := range data { 31 data[j] = 'A' + byte(i) 32 } 33 wr.Append(data) 34 } 35 assert.NoError(t, wr.Finish()) 36 37 // Verify the data 38 sc := recordio.NewScanner(bytes.NewReader(buf.Bytes()), recordio.ScannerOpts{}) 39 for i := 0; i < nRecs; i++ { 40 assert.True(t, sc.Scan(), "err: %v", sc.Err()) 41 data := sc.Get().([]byte) 42 assert.EQ(t, len(data), itemSize) 43 for j := range data { 44 assert.EQ(t, data[j], byte('A'+i)) 45 } 46 } 47 assert.False(t, sc.Scan()) 48 assert.NoError(t, sc.Err()) 49 return float64(len(buf.Bytes())) / float64(nRecs*itemSize) 50 } 51 52 func TestZstd(t *testing.T) { 53 recordiozstd.Init() 54 ratio := transformerTest(t, recordiozstd.Name) 55 assert.LT(t, ratio, 0.2) 56 } 57 58 func TestFlate(t *testing.T) { 59 recordioflate.Init() 60 ratio := transformerTest(t, recordioflate.Name) 61 assert.LT(t, ratio, 0.2) 62 }