github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/testutils/random_arrow.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package testutils 18 19 import ( 20 "github.com/apache/arrow/go/v14/arrow" 21 "github.com/apache/arrow/go/v14/arrow/array" 22 "github.com/apache/arrow/go/v14/arrow/memory" 23 "golang.org/x/exp/rand" 24 ) 25 26 // RandomNonNull generates a random arrow array of the requested type with length size with no nulls. 27 // Accepts float32, float64, all integer primitives, Date32, date64, string, binary, fixed_size_binary, bool and decimal. 28 // 29 // Always uses 0 as the seed with the following min/max restrictions: 30 // int16, uint16, int8, and uint8 will be min 0, max 64 31 // Date32 and Date64 will be between 0 and 24 * 86400000 in increments of 86400000 32 // String will all have the value "test-string" 33 // binary will have each value between length 2 and 12 but random bytes that are not limited to ascii 34 // fixed size binary will all be of length 10, random bytes are not limited to ascii 35 // bool will be approximately half false and half true randomly. 36 func RandomNonNull(mem memory.Allocator, dt arrow.DataType, size int) arrow.Array { 37 switch dt.ID() { 38 case arrow.FLOAT32: 39 bldr := array.NewFloat32Builder(mem) 40 defer bldr.Release() 41 values := make([]float32, size) 42 FillRandomFloat32(0, values) 43 bldr.AppendValues(values, nil) 44 return bldr.NewArray() 45 case arrow.FLOAT64: 46 bldr := array.NewFloat64Builder(mem) 47 defer bldr.Release() 48 values := make([]float64, size) 49 FillRandomFloat64(0, values) 50 bldr.AppendValues(values, nil) 51 return bldr.NewArray() 52 case arrow.INT64: 53 bldr := array.NewInt64Builder(mem) 54 defer bldr.Release() 55 values := make([]int64, size) 56 FillRandomInt64(0, values) 57 bldr.AppendValues(values, nil) 58 return bldr.NewArray() 59 case arrow.UINT64: 60 bldr := array.NewUint64Builder(mem) 61 defer bldr.Release() 62 values := make([]uint64, size) 63 FillRandomUint64(0, values) 64 bldr.AppendValues(values, nil) 65 return bldr.NewArray() 66 case arrow.INT32: 67 bldr := array.NewInt32Builder(mem) 68 defer bldr.Release() 69 values := make([]int32, size) 70 FillRandomInt32(0, values) 71 bldr.AppendValues(values, nil) 72 return bldr.NewArray() 73 case arrow.UINT32: 74 bldr := array.NewUint32Builder(mem) 75 defer bldr.Release() 76 values := make([]uint32, size) 77 FillRandomUint32(0, values) 78 bldr.AppendValues(values, nil) 79 return bldr.NewArray() 80 case arrow.INT16: 81 bldr := array.NewInt16Builder(mem) 82 defer bldr.Release() 83 values := make([]int16, size) 84 FillRandomInt16(0, 0, 64, values) 85 bldr.AppendValues(values, nil) 86 return bldr.NewArray() 87 case arrow.UINT16: 88 bldr := array.NewUint16Builder(mem) 89 defer bldr.Release() 90 values := make([]uint16, size) 91 FillRandomUint16(0, 0, 64, values) 92 bldr.AppendValues(values, nil) 93 return bldr.NewArray() 94 case arrow.INT8: 95 bldr := array.NewInt8Builder(mem) 96 defer bldr.Release() 97 values := make([]int8, size) 98 FillRandomInt8(0, 0, 64, values) 99 bldr.AppendValues(values, nil) 100 return bldr.NewArray() 101 case arrow.UINT8: 102 bldr := array.NewUint8Builder(mem) 103 defer bldr.Release() 104 values := make([]uint8, size) 105 FillRandomUint8(0, 0, 64, values) 106 bldr.AppendValues(values, nil) 107 return bldr.NewArray() 108 case arrow.DATE32: 109 bldr := array.NewDate32Builder(mem) 110 defer bldr.Release() 111 values := make([]int32, size) 112 FillRandomInt32Max(0, 24, values) 113 114 dates := make([]arrow.Date32, size) 115 for idx, val := range values { 116 dates[idx] = arrow.Date32(val) * 86400000 117 } 118 bldr.AppendValues(dates, nil) 119 return bldr.NewArray() 120 case arrow.DATE64: 121 bldr := array.NewDate64Builder(mem) 122 defer bldr.Release() 123 values := make([]int64, size) 124 FillRandomInt64Max(0, 24, values) 125 126 dates := make([]arrow.Date64, size) 127 for idx, val := range values { 128 dates[idx] = arrow.Date64(val) * 86400000 129 } 130 bldr.AppendValues(dates, nil) 131 return bldr.NewArray() 132 case arrow.STRING: 133 bldr := array.NewStringBuilder(mem) 134 defer bldr.Release() 135 for i := 0; i < size; i++ { 136 bldr.Append("test-string") 137 } 138 return bldr.NewArray() 139 case arrow.LARGE_STRING: 140 bldr := array.NewLargeStringBuilder(mem) 141 defer bldr.Release() 142 for i := 0; i < size; i++ { 143 bldr.Append("test-large-string") 144 } 145 return bldr.NewArray() 146 case arrow.BINARY, arrow.LARGE_BINARY: 147 bldr := array.NewBinaryBuilder(mem, dt.(arrow.BinaryDataType)) 148 defer bldr.Release() 149 150 buf := make([]byte, 12) 151 r := rand.New(rand.NewSource(0)) 152 for i := 0; i < size; i++ { 153 length := r.Intn(12-2+1) + 2 154 r.Read(buf[:length]) 155 bldr.Append(buf[:length]) 156 } 157 return bldr.NewArray() 158 case arrow.FIXED_SIZE_BINARY: 159 bldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 10}) 160 defer bldr.Release() 161 162 buf := make([]byte, 10) 163 r := rand.New(rand.NewSource(0)) 164 for i := 0; i < size; i++ { 165 r.Read(buf) 166 bldr.Append(buf) 167 } 168 return bldr.NewArray() 169 case arrow.DECIMAL: 170 dectype := dt.(*arrow.Decimal128Type) 171 bldr := array.NewDecimal128Builder(mem, dectype) 172 defer bldr.Release() 173 174 data := RandomDecimals(int64(size), 0, dectype.Precision) 175 bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), nil) 176 return bldr.NewArray() 177 case arrow.BOOL: 178 bldr := array.NewBooleanBuilder(mem) 179 defer bldr.Release() 180 181 values := make([]bool, size) 182 FillRandomBooleans(0.5, 0, values) 183 bldr.AppendValues(values, nil) 184 return bldr.NewArray() 185 } 186 return nil 187 } 188 189 // RandomNullable generates a random arrow array of length size with approximately numNulls, 190 // at most there can be size/2 nulls. Other than there being nulls, the values follow the same rules 191 // as described in the docs for RandomNonNull. 192 func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array { 193 switch dt.ID() { 194 case arrow.FLOAT32: 195 bldr := array.NewFloat32Builder(memory.DefaultAllocator) 196 defer bldr.Release() 197 values := make([]float32, size) 198 FillRandomFloat32(0, values) 199 200 valid := make([]bool, size) 201 for idx := range valid { 202 valid[idx] = true 203 } 204 for i := 0; i < numNulls; i++ { 205 valid[i*2] = false 206 } 207 bldr.AppendValues(values, valid) 208 return bldr.NewArray() 209 case arrow.FLOAT64: 210 bldr := array.NewFloat64Builder(memory.DefaultAllocator) 211 defer bldr.Release() 212 values := make([]float64, size) 213 FillRandomFloat64(0, values) 214 215 valid := make([]bool, size) 216 for idx := range valid { 217 valid[idx] = true 218 } 219 for i := 0; i < numNulls; i++ { 220 valid[i*2] = false 221 } 222 bldr.AppendValues(values, valid) 223 return bldr.NewArray() 224 case arrow.INT8: 225 bldr := array.NewInt8Builder(memory.DefaultAllocator) 226 defer bldr.Release() 227 values := make([]int8, size) 228 FillRandomInt8(0, 0, 64, values) 229 valid := make([]bool, size) 230 for idx := range valid { 231 valid[idx] = true 232 } 233 for i := 0; i < numNulls; i++ { 234 valid[i*2] = false 235 } 236 237 bldr.AppendValues(values, valid) 238 return bldr.NewArray() 239 case arrow.UINT8: 240 bldr := array.NewUint8Builder(memory.DefaultAllocator) 241 defer bldr.Release() 242 values := make([]uint8, size) 243 FillRandomUint8(0, 0, 64, values) 244 valid := make([]bool, size) 245 for idx := range valid { 246 valid[idx] = true 247 } 248 for i := 0; i < numNulls; i++ { 249 valid[i*2] = false 250 } 251 252 bldr.AppendValues(values, valid) 253 return bldr.NewArray() 254 case arrow.INT16: 255 bldr := array.NewInt16Builder(memory.DefaultAllocator) 256 defer bldr.Release() 257 values := make([]int16, size) 258 FillRandomInt16(0, 0, 64, values) 259 valid := make([]bool, size) 260 for idx := range valid { 261 valid[idx] = true 262 } 263 for i := 0; i < numNulls; i++ { 264 valid[i*2] = false 265 } 266 267 bldr.AppendValues(values, valid) 268 return bldr.NewArray() 269 case arrow.UINT16: 270 bldr := array.NewUint16Builder(memory.DefaultAllocator) 271 defer bldr.Release() 272 values := make([]uint16, size) 273 FillRandomUint16(0, 0, 64, values) 274 valid := make([]bool, size) 275 for idx := range valid { 276 valid[idx] = true 277 } 278 for i := 0; i < numNulls; i++ { 279 valid[i*2] = false 280 } 281 282 bldr.AppendValues(values, valid) 283 return bldr.NewArray() 284 case arrow.INT32: 285 bldr := array.NewInt32Builder(memory.DefaultAllocator) 286 defer bldr.Release() 287 values := make([]int32, size) 288 FillRandomInt32Max(0, 64, values) 289 valid := make([]bool, size) 290 for idx := range valid { 291 valid[idx] = true 292 } 293 for i := 0; i < numNulls; i++ { 294 valid[i*2] = false 295 } 296 297 bldr.AppendValues(values, valid) 298 return bldr.NewArray() 299 case arrow.UINT32: 300 bldr := array.NewUint32Builder(memory.DefaultAllocator) 301 defer bldr.Release() 302 values := make([]uint32, size) 303 FillRandomUint32Max(0, 64, values) 304 valid := make([]bool, size) 305 for idx := range valid { 306 valid[idx] = true 307 } 308 for i := 0; i < numNulls; i++ { 309 valid[i*2] = false 310 } 311 312 bldr.AppendValues(values, valid) 313 return bldr.NewArray() 314 315 case arrow.INT64: 316 bldr := array.NewInt64Builder(memory.DefaultAllocator) 317 defer bldr.Release() 318 values := make([]int64, size) 319 FillRandomInt64Max(0, 64, values) 320 valid := make([]bool, size) 321 for idx := range valid { 322 valid[idx] = true 323 } 324 for i := 0; i < numNulls; i++ { 325 valid[i*2] = false 326 } 327 328 bldr.AppendValues(values, valid) 329 return bldr.NewArray() 330 case arrow.UINT64: 331 bldr := array.NewUint64Builder(memory.DefaultAllocator) 332 defer bldr.Release() 333 values := make([]uint64, size) 334 FillRandomUint64Max(0, 64, values) 335 valid := make([]bool, size) 336 for idx := range valid { 337 valid[idx] = true 338 } 339 for i := 0; i < numNulls; i++ { 340 valid[i*2] = false 341 } 342 343 bldr.AppendValues(values, valid) 344 return bldr.NewArray() 345 case arrow.DATE32: 346 bldr := array.NewDate32Builder(memory.DefaultAllocator) 347 defer bldr.Release() 348 values := make([]int32, size) 349 FillRandomInt32Max(0, 24, values) 350 351 dates := make([]arrow.Date32, size) 352 for idx, val := range values { 353 dates[idx] = arrow.Date32(val) * 86400000 354 } 355 valid := make([]bool, size) 356 for idx := range valid { 357 valid[idx] = true 358 } 359 for i := 0; i < numNulls; i++ { 360 valid[i*2] = false 361 } 362 bldr.AppendValues(dates, valid) 363 return bldr.NewArray() 364 case arrow.DATE64: 365 bldr := array.NewDate64Builder(memory.DefaultAllocator) 366 defer bldr.Release() 367 values := make([]int64, size) 368 FillRandomInt64Max(0, 24, values) 369 370 dates := make([]arrow.Date64, size) 371 for idx, val := range values { 372 dates[idx] = arrow.Date64(val) * 86400000 373 } 374 valid := make([]bool, size) 375 for idx := range valid { 376 valid[idx] = true 377 } 378 for i := 0; i < numNulls; i++ { 379 valid[i*2] = false 380 } 381 bldr.AppendValues(dates, valid) 382 return bldr.NewArray() 383 case arrow.BINARY: 384 bldr := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) 385 defer bldr.Release() 386 387 valid := make([]bool, size) 388 for idx := range valid { 389 valid[idx] = true 390 } 391 for i := 0; i < numNulls; i++ { 392 valid[i*2] = false 393 } 394 395 buf := make([]byte, 12) 396 r := rand.New(rand.NewSource(0)) 397 for i := 0; i < size; i++ { 398 if !valid[i] { 399 bldr.AppendNull() 400 continue 401 } 402 403 length := r.Intn(12-2+1) + 2 404 r.Read(buf[:length]) 405 bldr.Append(buf[:length]) 406 } 407 return bldr.NewArray() 408 case arrow.STRING: 409 bldr := array.NewStringBuilder(memory.DefaultAllocator) 410 defer bldr.Release() 411 412 valid := make([]bool, size) 413 for idx := range valid { 414 valid[idx] = true 415 } 416 for i := 0; i < numNulls; i++ { 417 valid[i*2] = false 418 } 419 420 buf := make([]byte, 12) 421 r := rand.New(rand.NewSource(0)) 422 for i := 0; i < size; i++ { 423 if !valid[i] { 424 bldr.AppendNull() 425 continue 426 } 427 428 length := r.Intn(12-2+1) + 2 429 r.Read(buf[:length]) 430 // trivially force data to be valid UTF8 by making it all ASCII 431 for idx := range buf[:length] { 432 buf[idx] &= 0x7f 433 } 434 bldr.Append(string(buf[:length])) 435 } 436 return bldr.NewArray() 437 case arrow.FIXED_SIZE_BINARY: 438 bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 10}) 439 defer bldr.Release() 440 441 valid := make([]bool, size) 442 for idx := range valid { 443 valid[idx] = true 444 } 445 for i := 0; i < numNulls; i++ { 446 valid[i*2] = false 447 } 448 449 buf := make([]byte, 10) 450 r := rand.New(rand.NewSource(0)) 451 for i := 0; i < size; i++ { 452 if !valid[i] { 453 bldr.AppendNull() 454 continue 455 } 456 457 r.Read(buf) 458 bldr.Append(buf) 459 } 460 return bldr.NewArray() 461 case arrow.DECIMAL: 462 dectype := dt.(*arrow.Decimal128Type) 463 bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dectype) 464 defer bldr.Release() 465 466 valid := make([]bool, size) 467 for idx := range valid { 468 valid[idx] = true 469 } 470 for i := 0; i < numNulls; i++ { 471 valid[i*2] = false 472 } 473 474 data := RandomDecimals(int64(size), 0, dectype.Precision) 475 bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), valid) 476 return bldr.NewArray() 477 case arrow.BOOL: 478 bldr := array.NewBooleanBuilder(memory.DefaultAllocator) 479 defer bldr.Release() 480 481 valid := make([]bool, size) 482 for idx := range valid { 483 valid[idx] = true 484 } 485 for i := 0; i < numNulls; i++ { 486 valid[i*2] = false 487 } 488 489 values := make([]bool, size) 490 FillRandomBooleans(0.5, 0, values) 491 bldr.AppendValues(values, valid) 492 return bldr.NewArray() 493 } 494 return nil 495 }