github.com/apache/arrow/go/v16@v16.1.0/arrow/ipc/writer_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package ipc
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"fmt"
    23  	"math"
    24  	"strings"
    25  	"testing"
    26  
    27  	"github.com/apache/arrow/go/v16/arrow"
    28  	"github.com/apache/arrow/go/v16/arrow/array"
    29  	"github.com/apache/arrow/go/v16/arrow/bitutil"
    30  	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
    31  	"github.com/apache/arrow/go/v16/arrow/memory"
    32  	"github.com/stretchr/testify/assert"
    33  	"github.com/stretchr/testify/require"
    34  )
    35  
    36  // reproducer from ARROW-13529
    37  func TestSliceAndWrite(t *testing.T) {
    38  	alloc := memory.NewGoAllocator()
    39  	schema := arrow.NewSchema([]arrow.Field{
    40  		{Name: "s", Type: arrow.BinaryTypes.String},
    41  	}, nil)
    42  
    43  	b := array.NewRecordBuilder(alloc, schema)
    44  	defer b.Release()
    45  
    46  	b.Field(0).(*array.StringBuilder).AppendValues([]string{"foo", "bar", "baz"}, nil)
    47  	rec := b.NewRecord()
    48  	defer rec.Release()
    49  
    50  	sliceAndWrite := func(rec arrow.Record, schema *arrow.Schema) {
    51  		slice := rec.NewSlice(1, 2)
    52  		defer slice.Release()
    53  
    54  		fmt.Println(slice.Columns()[0].(*array.String).Value(0))
    55  
    56  		var buf bytes.Buffer
    57  		w := NewWriter(&buf, WithSchema(schema))
    58  		w.Write(slice)
    59  		w.Close()
    60  	}
    61  
    62  	assert.NotPanics(t, func() {
    63  		for i := 0; i < 2; i++ {
    64  			sliceAndWrite(rec, schema)
    65  		}
    66  	})
    67  }
    68  
    69  func TestNewTruncatedBitmap(t *testing.T) {
    70  	alloc := memory.NewCheckedAllocator(memory.DefaultAllocator)
    71  	defer alloc.AssertSize(t, 0)
    72  
    73  	assert.Nil(t, newTruncatedBitmap(alloc, 0, 0, nil), "input bitmap is null")
    74  
    75  	buf := memory.NewBufferBytes(make([]byte, bitutil.BytesForBits(8)))
    76  	defer buf.Release()
    77  
    78  	bitutil.SetBit(buf.Bytes(), 0)
    79  	bitutil.SetBit(buf.Bytes(), 2)
    80  	bitutil.SetBit(buf.Bytes(), 4)
    81  	bitutil.SetBit(buf.Bytes(), 6)
    82  
    83  	assert.Same(t, buf, newTruncatedBitmap(alloc, 0, 8, buf), "no truncation necessary")
    84  
    85  	result := newTruncatedBitmap(alloc, 1, 7, buf)
    86  	defer result.Release()
    87  	for i, exp := range []bool{false, true, false, true, false, true, false} {
    88  		assert.Equal(t, exp, bitutil.BitIsSet(result.Bytes(), i), "truncate for offset")
    89  	}
    90  
    91  	buf = memory.NewBufferBytes(make([]byte, 128))
    92  	defer buf.Release()
    93  	bitutil.SetBitsTo(buf.Bytes(), 0, 128*8, true)
    94  
    95  	result = newTruncatedBitmap(alloc, 0, 8, buf)
    96  	defer result.Release()
    97  	assert.Equal(t, 64, result.Len(), "truncate to smaller buffer")
    98  	assert.Equal(t, 8, bitutil.CountSetBits(result.Bytes(), 0, 8))
    99  }
   100  
   101  func TestGetZeroBasedValueOffsets(t *testing.T) {
   102  	alloc := memory.NewCheckedAllocator(memory.DefaultAllocator)
   103  	defer alloc.AssertSize(t, 0)
   104  
   105  	vals := []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}
   106  	b := array.NewStringBuilder(alloc)
   107  	defer b.Release()
   108  	b.AppendValues(vals, nil)
   109  
   110  	arr := b.NewArray()
   111  	defer arr.Release()
   112  
   113  	env := &recordEncoder{mem: alloc}
   114  
   115  	offsets := env.getZeroBasedValueOffsets(arr)
   116  	defer offsets.Release()
   117  	assert.Equal(t, 44, offsets.Len(), "include all offsets if array is not sliced")
   118  
   119  	sl := array.NewSlice(arr, 0, 4)
   120  	defer sl.Release()
   121  
   122  	offsets = env.getZeroBasedValueOffsets(sl)
   123  	defer offsets.Release()
   124  	assert.Equal(t, 20, offsets.Len(), "trim trailing offsets after slice")
   125  }
   126  
   127  func TestWriterCatchPanic(t *testing.T) {
   128  	alloc := memory.NewGoAllocator()
   129  	schema := arrow.NewSchema([]arrow.Field{
   130  		{Name: "s", Type: arrow.BinaryTypes.String},
   131  	}, nil)
   132  
   133  	b := array.NewRecordBuilder(alloc, schema)
   134  	defer b.Release()
   135  
   136  	b.Field(0).(*array.StringBuilder).AppendValues([]string{"foo", "bar", "baz"}, nil)
   137  	rec := b.NewRecord()
   138  	defer rec.Release()
   139  
   140  	// mess up the first offset for the string column
   141  	offsetBuf := rec.Column(0).Data().Buffers()[1]
   142  	bitutil.SetBitsTo(offsetBuf.Bytes(), 0, 32, true)
   143  
   144  	buf := new(bytes.Buffer)
   145  
   146  	writer := NewWriter(buf, WithSchema(schema))
   147  	assert.EqualError(t, writer.Write(rec), "arrow/ipc: unknown error while writing: runtime error: slice bounds out of range [-1:]")
   148  }
   149  
   150  func TestWriterMemCompression(t *testing.T) {
   151  	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
   152  	defer mem.AssertSize(t, 0)
   153  
   154  	schema := arrow.NewSchema([]arrow.Field{
   155  		{Name: "s", Type: arrow.BinaryTypes.String},
   156  	}, nil)
   157  
   158  	b := array.NewRecordBuilder(mem, schema)
   159  	defer b.Release()
   160  
   161  	b.Field(0).(*array.StringBuilder).AppendValues([]string{"foo", "bar", "baz"}, nil)
   162  	rec := b.NewRecord()
   163  	defer rec.Release()
   164  
   165  	var buf bytes.Buffer
   166  	w := NewWriter(&buf, WithAllocator(mem), WithSchema(schema), WithZstd())
   167  	defer w.Close()
   168  
   169  	require.NoError(t, w.Write(rec))
   170  }
   171  
   172  func TestWriteWithCompressionAndMinSavings(t *testing.T) {
   173  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   174  	defer mem.AssertSize(t, 0)
   175  
   176  	// a small batch that is known to be compressible
   177  	batch, _, err := array.RecordFromJSON(mem, arrow.NewSchema([]arrow.Field{
   178  		{Name: "n", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil),
   179  		strings.NewReader(`[
   180  			{"n": 0}, {"n": 1}, {"n": 2}, {"n": 3}, {"n": 4},
   181  			{"n": 5}, {"n": 6}, {"n": 7}, {"n": 8}, {"n": 9}]`))
   182  	require.NoError(t, err)
   183  	defer batch.Release()
   184  
   185  	prefixedSize := func(buf *memory.Buffer) int64 {
   186  		if buf.Len() < arrow.Int64SizeBytes {
   187  			return 0
   188  		}
   189  		return int64(binary.LittleEndian.Uint64(buf.Bytes()))
   190  	}
   191  	contentSize := func(buf *memory.Buffer) int64 {
   192  		return int64(buf.Len()) - int64(arrow.Int64SizeBytes)
   193  	}
   194  
   195  	for _, codec := range []flatbuf.CompressionType{flatbuf.CompressionTypeLZ4_FRAME, flatbuf.CompressionTypeZSTD} {
   196  		enc := newRecordEncoder(mem, 0, 5, true, codec, 1, nil)
   197  		var payload Payload
   198  		require.NoError(t, enc.encode(&payload, batch))
   199  		assert.Len(t, payload.body, 2)
   200  
   201  		// compute the savings when body buffers are compressed unconditionally.
   202  		// We also validate that our test batch is indeed compressible.
   203  		uncompressedSize, compressedSize := prefixedSize(payload.body[1]), contentSize(payload.body[1])
   204  		assert.Less(t, compressedSize, uncompressedSize)
   205  		assert.Greater(t, compressedSize, int64(0))
   206  		expectedSavings := 1.0 - float64(compressedSize)/float64(uncompressedSize)
   207  
   208  		compressEncoder := newRecordEncoder(mem, 0, 5, true, codec, 1, &expectedSavings)
   209  		payload.Release()
   210  		payload.body = payload.body[:0]
   211  		require.NoError(t, compressEncoder.encode(&payload, batch))
   212  		assert.Len(t, payload.body, 2)
   213  		assert.Equal(t, uncompressedSize, prefixedSize(payload.body[1]))
   214  		assert.Equal(t, compressedSize, contentSize(payload.body[1]))
   215  
   216  		payload.Release()
   217  		payload.body = payload.body[:0]
   218  		// slightly bump the threshold. the body buffer should now be prefixed
   219  		// with -1 and its content left uncompressed
   220  		minSavings := math.Nextafter(expectedSavings, 1.0)
   221  		compressEncoder.minSpaceSavings = &minSavings
   222  		require.NoError(t, compressEncoder.encode(&payload, batch))
   223  		assert.Len(t, payload.body, 2)
   224  		assert.EqualValues(t, -1, prefixedSize(payload.body[1]))
   225  		assert.Equal(t, uncompressedSize, contentSize(payload.body[1]))
   226  		payload.Release()
   227  		payload.body = payload.body[:0]
   228  
   229  		for _, outOfRange := range []float64{math.Nextafter(1.0, 2.0), math.Nextafter(0, -1)} {
   230  			compressEncoder.minSpaceSavings = &outOfRange
   231  			err := compressEncoder.encode(&payload, batch)
   232  			assert.ErrorIs(t, err, arrow.ErrInvalid)
   233  			assert.ErrorContains(t, err, "minSpaceSavings not in range [0,1]")
   234  		}
   235  	}
   236  }