github.com/apache/arrow/go/v14@v14.0.1/parquet/compress/compress_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package compress_test 18 19 import ( 20 "bytes" 21 "io" 22 "math/rand" 23 "testing" 24 25 "github.com/apache/arrow/go/v14/parquet/compress" 26 "github.com/stretchr/testify/assert" 27 ) 28 29 const ( 30 RandomDataSize = 3 * 1024 * 1024 31 CompressibleDataSize = 8 * 1024 * 1024 32 ) 33 34 func makeRandomData(size int) []byte { 35 ret := make([]byte, size) 36 r := rand.New(rand.NewSource(1234)) 37 r.Read(ret) 38 return ret 39 } 40 41 func makeCompressibleData(size int) []byte { 42 const base = "Apache Arrow is a cross-language development platform for in-memory data" 43 44 data := make([]byte, size) 45 n := copy(data, base) 46 for i := n; i < len(data); i *= 2 { 47 copy(data[i:], data[:i]) 48 } 49 return data 50 } 51 52 func TestErrorForUnimplemented(t *testing.T) { 53 _, err := compress.GetCodec(compress.Codecs.Lzo) 54 assert.Error(t, err) 55 56 _, err = compress.GetCodec(compress.Codecs.Lz4) 57 assert.Error(t, err) 58 } 59 60 func TestCompressDataOneShot(t *testing.T) { 61 tests := []struct { 62 c compress.Compression 63 }{ 64 {compress.Codecs.Uncompressed}, 65 {compress.Codecs.Snappy}, 66 {compress.Codecs.Gzip}, 67 {compress.Codecs.Brotli}, 68 {compress.Codecs.Zstd}, 69 // {compress.Codecs.Lzo}, 70 // {compress.Codecs.Lz4}, 71 } 72 73 for _, tt := range tests { 74 t.Run(tt.c.String(), func(t *testing.T) { 75 codec, err := compress.GetCodec(tt.c) 76 assert.NoError(t, err) 77 data := makeCompressibleData(CompressibleDataSize) 78 79 buf := make([]byte, codec.CompressBound(int64(len(data)))) 80 compressed := codec.Encode(buf, data) 81 assert.Same(t, &buf[0], &compressed[0]) 82 83 out := make([]byte, len(data)) 84 uncompressed := codec.Decode(out, compressed) 85 assert.Same(t, &out[0], &uncompressed[0]) 86 87 assert.Exactly(t, data, uncompressed) 88 }) 89 } 90 } 91 92 func TestCompressReaderWriter(t *testing.T) { 93 tests := []struct { 94 c compress.Compression 95 }{ 96 {compress.Codecs.Uncompressed}, 97 {compress.Codecs.Snappy}, 98 {compress.Codecs.Gzip}, 99 {compress.Codecs.Brotli}, 100 {compress.Codecs.Zstd}, 101 // {compress.Codecs.Lzo}, 102 // {compress.Codecs.Lz4}, 103 } 104 105 for _, tt := range tests { 106 t.Run(tt.c.String(), func(t *testing.T) { 107 var buf bytes.Buffer 108 codec, err := compress.GetCodec(tt.c) 109 assert.NoError(t, err) 110 data := makeRandomData(RandomDataSize) 111 112 wr := codec.NewWriter(&buf) 113 114 const chunkSize = 1111 115 input := data 116 for len(input) > 0 { 117 var ( 118 n int 119 err error 120 ) 121 if len(input) > chunkSize { 122 n, err = wr.Write(input[:chunkSize]) 123 } else { 124 n, err = wr.Write(input) 125 } 126 127 assert.NoError(t, err) 128 input = input[n:] 129 } 130 wr.Close() 131 132 rdr := codec.NewReader(&buf) 133 out, err := io.ReadAll(rdr) 134 assert.NoError(t, err) 135 assert.Exactly(t, data, out) 136 }) 137 } 138 }