github.com/apache/arrow/go/v14@v14.0.2/parquet/compress/compress_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package compress_test
    18  
    19  import (
    20  	"bytes"
    21  	"io"
    22  	"math/rand"
    23  	"testing"
    24  
    25  	"github.com/apache/arrow/go/v14/parquet/compress"
    26  	"github.com/stretchr/testify/assert"
    27  )
    28  
    29  const (
    30  	RandomDataSize       = 3 * 1024 * 1024
    31  	CompressibleDataSize = 8 * 1024 * 1024
    32  )
    33  
    34  func makeRandomData(size int) []byte {
    35  	ret := make([]byte, size)
    36  	r := rand.New(rand.NewSource(1234))
    37  	r.Read(ret)
    38  	return ret
    39  }
    40  
    41  func makeCompressibleData(size int) []byte {
    42  	const base = "Apache Arrow is a cross-language development platform for in-memory data"
    43  
    44  	data := make([]byte, size)
    45  	n := copy(data, base)
    46  	for i := n; i < len(data); i *= 2 {
    47  		copy(data[i:], data[:i])
    48  	}
    49  	return data
    50  }
    51  
    52  func TestErrorForUnimplemented(t *testing.T) {
    53  	_, err := compress.GetCodec(compress.Codecs.Lzo)
    54  	assert.Error(t, err)
    55  
    56  	_, err = compress.GetCodec(compress.Codecs.Lz4)
    57  	assert.Error(t, err)
    58  }
    59  
    60  func TestCompressDataOneShot(t *testing.T) {
    61  	tests := []struct {
    62  		c compress.Compression
    63  	}{
    64  		{compress.Codecs.Uncompressed},
    65  		{compress.Codecs.Snappy},
    66  		{compress.Codecs.Gzip},
    67  		{compress.Codecs.Brotli},
    68  		{compress.Codecs.Zstd},
    69  		// {compress.Codecs.Lzo},
    70  		// {compress.Codecs.Lz4},
    71  	}
    72  
    73  	for _, tt := range tests {
    74  		t.Run(tt.c.String(), func(t *testing.T) {
    75  			codec, err := compress.GetCodec(tt.c)
    76  			assert.NoError(t, err)
    77  			data := makeCompressibleData(CompressibleDataSize)
    78  
    79  			buf := make([]byte, codec.CompressBound(int64(len(data))))
    80  			compressed := codec.Encode(buf, data)
    81  			assert.Same(t, &buf[0], &compressed[0])
    82  
    83  			out := make([]byte, len(data))
    84  			uncompressed := codec.Decode(out, compressed)
    85  			assert.Same(t, &out[0], &uncompressed[0])
    86  
    87  			assert.Exactly(t, data, uncompressed)
    88  		})
    89  	}
    90  }
    91  
    92  func TestCompressReaderWriter(t *testing.T) {
    93  	tests := []struct {
    94  		c compress.Compression
    95  	}{
    96  		{compress.Codecs.Uncompressed},
    97  		{compress.Codecs.Snappy},
    98  		{compress.Codecs.Gzip},
    99  		{compress.Codecs.Brotli},
   100  		{compress.Codecs.Zstd},
   101  		// {compress.Codecs.Lzo},
   102  		// {compress.Codecs.Lz4},
   103  	}
   104  
   105  	for _, tt := range tests {
   106  		t.Run(tt.c.String(), func(t *testing.T) {
   107  			var buf bytes.Buffer
   108  			codec, err := compress.GetCodec(tt.c)
   109  			assert.NoError(t, err)
   110  			data := makeRandomData(RandomDataSize)
   111  
   112  			wr := codec.NewWriter(&buf)
   113  
   114  			const chunkSize = 1111
   115  			input := data
   116  			for len(input) > 0 {
   117  				var (
   118  					n   int
   119  					err error
   120  				)
   121  				if len(input) > chunkSize {
   122  					n, err = wr.Write(input[:chunkSize])
   123  				} else {
   124  					n, err = wr.Write(input)
   125  				}
   126  
   127  				assert.NoError(t, err)
   128  				input = input[n:]
   129  			}
   130  			wr.Close()
   131  
   132  			rdr := codec.NewReader(&buf)
   133  			out, err := io.ReadAll(rdr)
   134  			assert.NoError(t, err)
   135  			assert.Exactly(t, data, out)
   136  		})
   137  	}
   138  }