github.com/apache/arrow/go/v16@v16.1.0/arrow/ipc/writer_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package ipc 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "fmt" 23 "math" 24 "strings" 25 "testing" 26 27 "github.com/apache/arrow/go/v16/arrow" 28 "github.com/apache/arrow/go/v16/arrow/array" 29 "github.com/apache/arrow/go/v16/arrow/bitutil" 30 "github.com/apache/arrow/go/v16/arrow/internal/flatbuf" 31 "github.com/apache/arrow/go/v16/arrow/memory" 32 "github.com/stretchr/testify/assert" 33 "github.com/stretchr/testify/require" 34 ) 35 36 // reproducer from ARROW-13529 37 func TestSliceAndWrite(t *testing.T) { 38 alloc := memory.NewGoAllocator() 39 schema := arrow.NewSchema([]arrow.Field{ 40 {Name: "s", Type: arrow.BinaryTypes.String}, 41 }, nil) 42 43 b := array.NewRecordBuilder(alloc, schema) 44 defer b.Release() 45 46 b.Field(0).(*array.StringBuilder).AppendValues([]string{"foo", "bar", "baz"}, nil) 47 rec := b.NewRecord() 48 defer rec.Release() 49 50 sliceAndWrite := func(rec arrow.Record, schema *arrow.Schema) { 51 slice := rec.NewSlice(1, 2) 52 defer slice.Release() 53 54 fmt.Println(slice.Columns()[0].(*array.String).Value(0)) 55 56 var buf bytes.Buffer 57 w := NewWriter(&buf, WithSchema(schema)) 58 w.Write(slice) 59 w.Close() 60 } 61 62 assert.NotPanics(t, func() { 63 for i := 0; i < 2; i++ { 64 sliceAndWrite(rec, schema) 65 } 66 }) 67 } 68 69 func TestNewTruncatedBitmap(t *testing.T) { 70 alloc := memory.NewCheckedAllocator(memory.DefaultAllocator) 71 defer alloc.AssertSize(t, 0) 72 73 assert.Nil(t, newTruncatedBitmap(alloc, 0, 0, nil), "input bitmap is null") 74 75 buf := memory.NewBufferBytes(make([]byte, bitutil.BytesForBits(8))) 76 defer buf.Release() 77 78 bitutil.SetBit(buf.Bytes(), 0) 79 bitutil.SetBit(buf.Bytes(), 2) 80 bitutil.SetBit(buf.Bytes(), 4) 81 bitutil.SetBit(buf.Bytes(), 6) 82 83 assert.Same(t, buf, newTruncatedBitmap(alloc, 0, 8, buf), "no truncation necessary") 84 85 result := newTruncatedBitmap(alloc, 1, 7, buf) 86 defer result.Release() 87 for i, exp := range []bool{false, true, false, true, false, true, false} { 88 assert.Equal(t, exp, bitutil.BitIsSet(result.Bytes(), i), "truncate for offset") 89 } 90 91 buf = memory.NewBufferBytes(make([]byte, 128)) 92 defer buf.Release() 93 bitutil.SetBitsTo(buf.Bytes(), 0, 128*8, true) 94 95 result = newTruncatedBitmap(alloc, 0, 8, buf) 96 defer result.Release() 97 assert.Equal(t, 64, result.Len(), "truncate to smaller buffer") 98 assert.Equal(t, 8, bitutil.CountSetBits(result.Bytes(), 0, 8)) 99 } 100 101 func TestGetZeroBasedValueOffsets(t *testing.T) { 102 alloc := memory.NewCheckedAllocator(memory.DefaultAllocator) 103 defer alloc.AssertSize(t, 0) 104 105 vals := []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"} 106 b := array.NewStringBuilder(alloc) 107 defer b.Release() 108 b.AppendValues(vals, nil) 109 110 arr := b.NewArray() 111 defer arr.Release() 112 113 env := &recordEncoder{mem: alloc} 114 115 offsets := env.getZeroBasedValueOffsets(arr) 116 defer offsets.Release() 117 assert.Equal(t, 44, offsets.Len(), "include all offsets if array is not sliced") 118 119 sl := array.NewSlice(arr, 0, 4) 120 defer sl.Release() 121 122 offsets = env.getZeroBasedValueOffsets(sl) 123 defer offsets.Release() 124 assert.Equal(t, 20, offsets.Len(), "trim trailing offsets after slice") 125 } 126 127 func TestWriterCatchPanic(t *testing.T) { 128 alloc := memory.NewGoAllocator() 129 schema := arrow.NewSchema([]arrow.Field{ 130 {Name: "s", Type: arrow.BinaryTypes.String}, 131 }, nil) 132 133 b := array.NewRecordBuilder(alloc, schema) 134 defer b.Release() 135 136 b.Field(0).(*array.StringBuilder).AppendValues([]string{"foo", "bar", "baz"}, nil) 137 rec := b.NewRecord() 138 defer rec.Release() 139 140 // mess up the first offset for the string column 141 offsetBuf := rec.Column(0).Data().Buffers()[1] 142 bitutil.SetBitsTo(offsetBuf.Bytes(), 0, 32, true) 143 144 buf := new(bytes.Buffer) 145 146 writer := NewWriter(buf, WithSchema(schema)) 147 assert.EqualError(t, writer.Write(rec), "arrow/ipc: unknown error while writing: runtime error: slice bounds out of range [-1:]") 148 } 149 150 func TestWriterMemCompression(t *testing.T) { 151 mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) 152 defer mem.AssertSize(t, 0) 153 154 schema := arrow.NewSchema([]arrow.Field{ 155 {Name: "s", Type: arrow.BinaryTypes.String}, 156 }, nil) 157 158 b := array.NewRecordBuilder(mem, schema) 159 defer b.Release() 160 161 b.Field(0).(*array.StringBuilder).AppendValues([]string{"foo", "bar", "baz"}, nil) 162 rec := b.NewRecord() 163 defer rec.Release() 164 165 var buf bytes.Buffer 166 w := NewWriter(&buf, WithAllocator(mem), WithSchema(schema), WithZstd()) 167 defer w.Close() 168 169 require.NoError(t, w.Write(rec)) 170 } 171 172 func TestWriteWithCompressionAndMinSavings(t *testing.T) { 173 mem := memory.NewCheckedAllocator(memory.DefaultAllocator) 174 defer mem.AssertSize(t, 0) 175 176 // a small batch that is known to be compressible 177 batch, _, err := array.RecordFromJSON(mem, arrow.NewSchema([]arrow.Field{ 178 {Name: "n", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil), 179 strings.NewReader(`[ 180 {"n": 0}, {"n": 1}, {"n": 2}, {"n": 3}, {"n": 4}, 181 {"n": 5}, {"n": 6}, {"n": 7}, {"n": 8}, {"n": 9}]`)) 182 require.NoError(t, err) 183 defer batch.Release() 184 185 prefixedSize := func(buf *memory.Buffer) int64 { 186 if buf.Len() < arrow.Int64SizeBytes { 187 return 0 188 } 189 return int64(binary.LittleEndian.Uint64(buf.Bytes())) 190 } 191 contentSize := func(buf *memory.Buffer) int64 { 192 return int64(buf.Len()) - int64(arrow.Int64SizeBytes) 193 } 194 195 for _, codec := range []flatbuf.CompressionType{flatbuf.CompressionTypeLZ4_FRAME, flatbuf.CompressionTypeZSTD} { 196 enc := newRecordEncoder(mem, 0, 5, true, codec, 1, nil) 197 var payload Payload 198 require.NoError(t, enc.encode(&payload, batch)) 199 assert.Len(t, payload.body, 2) 200 201 // compute the savings when body buffers are compressed unconditionally. 202 // We also validate that our test batch is indeed compressible. 203 uncompressedSize, compressedSize := prefixedSize(payload.body[1]), contentSize(payload.body[1]) 204 assert.Less(t, compressedSize, uncompressedSize) 205 assert.Greater(t, compressedSize, int64(0)) 206 expectedSavings := 1.0 - float64(compressedSize)/float64(uncompressedSize) 207 208 compressEncoder := newRecordEncoder(mem, 0, 5, true, codec, 1, &expectedSavings) 209 payload.Release() 210 payload.body = payload.body[:0] 211 require.NoError(t, compressEncoder.encode(&payload, batch)) 212 assert.Len(t, payload.body, 2) 213 assert.Equal(t, uncompressedSize, prefixedSize(payload.body[1])) 214 assert.Equal(t, compressedSize, contentSize(payload.body[1])) 215 216 payload.Release() 217 payload.body = payload.body[:0] 218 // slightly bump the threshold. the body buffer should now be prefixed 219 // with -1 and its content left uncompressed 220 minSavings := math.Nextafter(expectedSavings, 1.0) 221 compressEncoder.minSpaceSavings = &minSavings 222 require.NoError(t, compressEncoder.encode(&payload, batch)) 223 assert.Len(t, payload.body, 2) 224 assert.EqualValues(t, -1, prefixedSize(payload.body[1])) 225 assert.Equal(t, uncompressedSize, contentSize(payload.body[1])) 226 payload.Release() 227 payload.body = payload.body[:0] 228 229 for _, outOfRange := range []float64{math.Nextafter(1.0, 2.0), math.Nextafter(0, -1)} { 230 compressEncoder.minSpaceSavings = &outOfRange 231 err := compressEncoder.encode(&payload, batch) 232 assert.ErrorIs(t, err, arrow.ErrInvalid) 233 assert.ErrorContains(t, err, "minSpaceSavings not in range [0,1]") 234 } 235 } 236 }