github.com/apache/arrow/go/v16@v16.1.0/parquet/internal/testutils/random_arrow.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package testutils 18 19 import ( 20 "github.com/apache/arrow/go/v16/arrow" 21 "github.com/apache/arrow/go/v16/arrow/array" 22 "github.com/apache/arrow/go/v16/arrow/float16" 23 "github.com/apache/arrow/go/v16/arrow/memory" 24 "golang.org/x/exp/rand" 25 ) 26 27 // RandomNonNull generates a random arrow array of the requested type with length size with no nulls. 28 // Accepts float32, float64, all integer primitives, Date32, date64, string, binary, fixed_size_binary, bool and decimal. 29 // 30 // Always uses 0 as the seed with the following min/max restrictions: 31 // int16, uint16, int8, and uint8 will be min 0, max 64 32 // Date32 and Date64 will be between 0 and 24 * 86400000 in increments of 86400000 33 // String will all have the value "test-string" 34 // binary will have each value between length 2 and 12 but random bytes that are not limited to ascii 35 // fixed size binary will all be of length 10, random bytes are not limited to ascii 36 // bool will be approximately half false and half true randomly. 37 func RandomNonNull(mem memory.Allocator, dt arrow.DataType, size int) arrow.Array { 38 switch dt.ID() { 39 case arrow.FLOAT32: 40 bldr := array.NewFloat32Builder(mem) 41 defer bldr.Release() 42 values := make([]float32, size) 43 FillRandomFloat32(0, values) 44 bldr.AppendValues(values, nil) 45 return bldr.NewArray() 46 case arrow.FLOAT64: 47 bldr := array.NewFloat64Builder(mem) 48 defer bldr.Release() 49 values := make([]float64, size) 50 FillRandomFloat64(0, values) 51 bldr.AppendValues(values, nil) 52 return bldr.NewArray() 53 case arrow.FLOAT16: 54 bldr := array.NewFloat16Builder(mem) 55 defer bldr.Release() 56 values := make([]float16.Num, size) 57 FillRandomFloat16(0, values) 58 bldr.AppendValues(values, nil) 59 return bldr.NewArray() 60 case arrow.INT64: 61 bldr := array.NewInt64Builder(mem) 62 defer bldr.Release() 63 values := make([]int64, size) 64 FillRandomInt64(0, values) 65 bldr.AppendValues(values, nil) 66 return bldr.NewArray() 67 case arrow.UINT64: 68 bldr := array.NewUint64Builder(mem) 69 defer bldr.Release() 70 values := make([]uint64, size) 71 FillRandomUint64(0, values) 72 bldr.AppendValues(values, nil) 73 return bldr.NewArray() 74 case arrow.INT32: 75 bldr := array.NewInt32Builder(mem) 76 defer bldr.Release() 77 values := make([]int32, size) 78 FillRandomInt32(0, values) 79 bldr.AppendValues(values, nil) 80 return bldr.NewArray() 81 case arrow.UINT32: 82 bldr := array.NewUint32Builder(mem) 83 defer bldr.Release() 84 values := make([]uint32, size) 85 FillRandomUint32(0, values) 86 bldr.AppendValues(values, nil) 87 return bldr.NewArray() 88 case arrow.INT16: 89 bldr := array.NewInt16Builder(mem) 90 defer bldr.Release() 91 values := make([]int16, size) 92 FillRandomInt16(0, 0, 64, values) 93 bldr.AppendValues(values, nil) 94 return bldr.NewArray() 95 case arrow.UINT16: 96 bldr := array.NewUint16Builder(mem) 97 defer bldr.Release() 98 values := make([]uint16, size) 99 FillRandomUint16(0, 0, 64, values) 100 bldr.AppendValues(values, nil) 101 return bldr.NewArray() 102 case arrow.INT8: 103 bldr := array.NewInt8Builder(mem) 104 defer bldr.Release() 105 values := make([]int8, size) 106 FillRandomInt8(0, 0, 64, values) 107 bldr.AppendValues(values, nil) 108 return bldr.NewArray() 109 case arrow.UINT8: 110 bldr := array.NewUint8Builder(mem) 111 defer bldr.Release() 112 values := make([]uint8, size) 113 FillRandomUint8(0, 0, 64, values) 114 bldr.AppendValues(values, nil) 115 return bldr.NewArray() 116 case arrow.DATE32: 117 bldr := array.NewDate32Builder(mem) 118 defer bldr.Release() 119 values := make([]int32, size) 120 FillRandomInt32Max(0, 24, values) 121 122 dates := make([]arrow.Date32, size) 123 for idx, val := range values { 124 dates[idx] = arrow.Date32(val) * 86400000 125 } 126 bldr.AppendValues(dates, nil) 127 return bldr.NewArray() 128 case arrow.DATE64: 129 bldr := array.NewDate64Builder(mem) 130 defer bldr.Release() 131 values := make([]int64, size) 132 FillRandomInt64Max(0, 24, values) 133 134 dates := make([]arrow.Date64, size) 135 for idx, val := range values { 136 dates[idx] = arrow.Date64(val) * 86400000 137 } 138 bldr.AppendValues(dates, nil) 139 return bldr.NewArray() 140 case arrow.STRING: 141 bldr := array.NewStringBuilder(mem) 142 defer bldr.Release() 143 for i := 0; i < size; i++ { 144 bldr.Append("test-string") 145 } 146 return bldr.NewArray() 147 case arrow.LARGE_STRING: 148 bldr := array.NewLargeStringBuilder(mem) 149 defer bldr.Release() 150 for i := 0; i < size; i++ { 151 bldr.Append("test-large-string") 152 } 153 return bldr.NewArray() 154 case arrow.BINARY, arrow.LARGE_BINARY: 155 bldr := array.NewBinaryBuilder(mem, dt.(arrow.BinaryDataType)) 156 defer bldr.Release() 157 158 buf := make([]byte, 12) 159 r := rand.New(rand.NewSource(0)) 160 for i := 0; i < size; i++ { 161 length := r.Intn(12-2+1) + 2 162 r.Read(buf[:length]) 163 bldr.Append(buf[:length]) 164 } 165 return bldr.NewArray() 166 case arrow.FIXED_SIZE_BINARY: 167 bldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 10}) 168 defer bldr.Release() 169 170 buf := make([]byte, 10) 171 r := rand.New(rand.NewSource(0)) 172 for i := 0; i < size; i++ { 173 r.Read(buf) 174 bldr.Append(buf) 175 } 176 return bldr.NewArray() 177 case arrow.DECIMAL: 178 dectype := dt.(*arrow.Decimal128Type) 179 bldr := array.NewDecimal128Builder(mem, dectype) 180 defer bldr.Release() 181 182 data := RandomDecimals(int64(size), 0, dectype.Precision) 183 bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), nil) 184 return bldr.NewArray() 185 case arrow.BOOL: 186 bldr := array.NewBooleanBuilder(mem) 187 defer bldr.Release() 188 189 values := make([]bool, size) 190 FillRandomBooleans(0.5, 0, values) 191 bldr.AppendValues(values, nil) 192 return bldr.NewArray() 193 } 194 return nil 195 } 196 197 // RandomNullable generates a random arrow array of length size with approximately numNulls, 198 // at most there can be size/2 nulls. Other than there being nulls, the values follow the same rules 199 // as described in the docs for RandomNonNull. 200 func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array { 201 switch dt.ID() { 202 case arrow.FLOAT32: 203 bldr := array.NewFloat32Builder(memory.DefaultAllocator) 204 defer bldr.Release() 205 values := make([]float32, size) 206 FillRandomFloat32(0, values) 207 208 valid := make([]bool, size) 209 for idx := range valid { 210 valid[idx] = true 211 } 212 for i := 0; i < numNulls; i++ { 213 valid[i*2] = false 214 } 215 bldr.AppendValues(values, valid) 216 return bldr.NewArray() 217 case arrow.FLOAT64: 218 bldr := array.NewFloat64Builder(memory.DefaultAllocator) 219 defer bldr.Release() 220 values := make([]float64, size) 221 FillRandomFloat64(0, values) 222 223 valid := make([]bool, size) 224 for idx := range valid { 225 valid[idx] = true 226 } 227 for i := 0; i < numNulls; i++ { 228 valid[i*2] = false 229 } 230 bldr.AppendValues(values, valid) 231 return bldr.NewArray() 232 case arrow.FLOAT16: 233 bldr := array.NewFloat16Builder(memory.DefaultAllocator) 234 defer bldr.Release() 235 values := make([]float16.Num, size) 236 FillRandomFloat16(0, values) 237 238 valid := make([]bool, size) 239 for idx := range valid { 240 valid[idx] = true 241 } 242 for i := 0; i < numNulls; i++ { 243 valid[i*2] = false 244 } 245 bldr.AppendValues(values, valid) 246 return bldr.NewArray() 247 case arrow.INT8: 248 bldr := array.NewInt8Builder(memory.DefaultAllocator) 249 defer bldr.Release() 250 values := make([]int8, size) 251 FillRandomInt8(0, 0, 64, values) 252 valid := make([]bool, size) 253 for idx := range valid { 254 valid[idx] = true 255 } 256 for i := 0; i < numNulls; i++ { 257 valid[i*2] = false 258 } 259 260 bldr.AppendValues(values, valid) 261 return bldr.NewArray() 262 case arrow.UINT8: 263 bldr := array.NewUint8Builder(memory.DefaultAllocator) 264 defer bldr.Release() 265 values := make([]uint8, size) 266 FillRandomUint8(0, 0, 64, values) 267 valid := make([]bool, size) 268 for idx := range valid { 269 valid[idx] = true 270 } 271 for i := 0; i < numNulls; i++ { 272 valid[i*2] = false 273 } 274 275 bldr.AppendValues(values, valid) 276 return bldr.NewArray() 277 case arrow.INT16: 278 bldr := array.NewInt16Builder(memory.DefaultAllocator) 279 defer bldr.Release() 280 values := make([]int16, size) 281 FillRandomInt16(0, 0, 64, values) 282 valid := make([]bool, size) 283 for idx := range valid { 284 valid[idx] = true 285 } 286 for i := 0; i < numNulls; i++ { 287 valid[i*2] = false 288 } 289 290 bldr.AppendValues(values, valid) 291 return bldr.NewArray() 292 case arrow.UINT16: 293 bldr := array.NewUint16Builder(memory.DefaultAllocator) 294 defer bldr.Release() 295 values := make([]uint16, size) 296 FillRandomUint16(0, 0, 64, values) 297 valid := make([]bool, size) 298 for idx := range valid { 299 valid[idx] = true 300 } 301 for i := 0; i < numNulls; i++ { 302 valid[i*2] = false 303 } 304 305 bldr.AppendValues(values, valid) 306 return bldr.NewArray() 307 case arrow.INT32: 308 bldr := array.NewInt32Builder(memory.DefaultAllocator) 309 defer bldr.Release() 310 values := make([]int32, size) 311 FillRandomInt32Max(0, 64, values) 312 valid := make([]bool, size) 313 for idx := range valid { 314 valid[idx] = true 315 } 316 for i := 0; i < numNulls; i++ { 317 valid[i*2] = false 318 } 319 320 bldr.AppendValues(values, valid) 321 return bldr.NewArray() 322 case arrow.UINT32: 323 bldr := array.NewUint32Builder(memory.DefaultAllocator) 324 defer bldr.Release() 325 values := make([]uint32, size) 326 FillRandomUint32Max(0, 64, values) 327 valid := make([]bool, size) 328 for idx := range valid { 329 valid[idx] = true 330 } 331 for i := 0; i < numNulls; i++ { 332 valid[i*2] = false 333 } 334 335 bldr.AppendValues(values, valid) 336 return bldr.NewArray() 337 338 case arrow.INT64: 339 bldr := array.NewInt64Builder(memory.DefaultAllocator) 340 defer bldr.Release() 341 values := make([]int64, size) 342 FillRandomInt64Max(0, 64, values) 343 valid := make([]bool, size) 344 for idx := range valid { 345 valid[idx] = true 346 } 347 for i := 0; i < numNulls; i++ { 348 valid[i*2] = false 349 } 350 351 bldr.AppendValues(values, valid) 352 return bldr.NewArray() 353 case arrow.UINT64: 354 bldr := array.NewUint64Builder(memory.DefaultAllocator) 355 defer bldr.Release() 356 values := make([]uint64, size) 357 FillRandomUint64Max(0, 64, values) 358 valid := make([]bool, size) 359 for idx := range valid { 360 valid[idx] = true 361 } 362 for i := 0; i < numNulls; i++ { 363 valid[i*2] = false 364 } 365 366 bldr.AppendValues(values, valid) 367 return bldr.NewArray() 368 case arrow.DATE32: 369 bldr := array.NewDate32Builder(memory.DefaultAllocator) 370 defer bldr.Release() 371 values := make([]int32, size) 372 FillRandomInt32Max(0, 24, values) 373 374 dates := make([]arrow.Date32, size) 375 for idx, val := range values { 376 dates[idx] = arrow.Date32(val) * 86400000 377 } 378 valid := make([]bool, size) 379 for idx := range valid { 380 valid[idx] = true 381 } 382 for i := 0; i < numNulls; i++ { 383 valid[i*2] = false 384 } 385 bldr.AppendValues(dates, valid) 386 return bldr.NewArray() 387 case arrow.DATE64: 388 bldr := array.NewDate64Builder(memory.DefaultAllocator) 389 defer bldr.Release() 390 values := make([]int64, size) 391 FillRandomInt64Max(0, 24, values) 392 393 dates := make([]arrow.Date64, size) 394 for idx, val := range values { 395 dates[idx] = arrow.Date64(val) * 86400000 396 } 397 valid := make([]bool, size) 398 for idx := range valid { 399 valid[idx] = true 400 } 401 for i := 0; i < numNulls; i++ { 402 valid[i*2] = false 403 } 404 bldr.AppendValues(dates, valid) 405 return bldr.NewArray() 406 case arrow.BINARY: 407 bldr := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) 408 defer bldr.Release() 409 410 valid := make([]bool, size) 411 for idx := range valid { 412 valid[idx] = true 413 } 414 for i := 0; i < numNulls; i++ { 415 valid[i*2] = false 416 } 417 418 buf := make([]byte, 12) 419 r := rand.New(rand.NewSource(0)) 420 for i := 0; i < size; i++ { 421 if !valid[i] { 422 bldr.AppendNull() 423 continue 424 } 425 426 length := r.Intn(12-2+1) + 2 427 r.Read(buf[:length]) 428 bldr.Append(buf[:length]) 429 } 430 return bldr.NewArray() 431 case arrow.STRING: 432 bldr := array.NewStringBuilder(memory.DefaultAllocator) 433 defer bldr.Release() 434 435 valid := make([]bool, size) 436 for idx := range valid { 437 valid[idx] = true 438 } 439 for i := 0; i < numNulls; i++ { 440 valid[i*2] = false 441 } 442 443 buf := make([]byte, 12) 444 r := rand.New(rand.NewSource(0)) 445 for i := 0; i < size; i++ { 446 if !valid[i] { 447 bldr.AppendNull() 448 continue 449 } 450 451 length := r.Intn(12-2+1) + 2 452 r.Read(buf[:length]) 453 // trivially force data to be valid UTF8 by making it all ASCII 454 for idx := range buf[:length] { 455 buf[idx] &= 0x7f 456 } 457 bldr.Append(string(buf[:length])) 458 } 459 return bldr.NewArray() 460 case arrow.FIXED_SIZE_BINARY: 461 bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 10}) 462 defer bldr.Release() 463 464 valid := make([]bool, size) 465 for idx := range valid { 466 valid[idx] = true 467 } 468 for i := 0; i < numNulls; i++ { 469 valid[i*2] = false 470 } 471 472 buf := make([]byte, 10) 473 r := rand.New(rand.NewSource(0)) 474 for i := 0; i < size; i++ { 475 if !valid[i] { 476 bldr.AppendNull() 477 continue 478 } 479 480 r.Read(buf) 481 bldr.Append(buf) 482 } 483 return bldr.NewArray() 484 case arrow.DECIMAL: 485 dectype := dt.(*arrow.Decimal128Type) 486 bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dectype) 487 defer bldr.Release() 488 489 valid := make([]bool, size) 490 for idx := range valid { 491 valid[idx] = true 492 } 493 for i := 0; i < numNulls; i++ { 494 valid[i*2] = false 495 } 496 497 data := RandomDecimals(int64(size), 0, dectype.Precision) 498 bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), valid) 499 return bldr.NewArray() 500 case arrow.BOOL: 501 bldr := array.NewBooleanBuilder(memory.DefaultAllocator) 502 defer bldr.Release() 503 504 valid := make([]bool, size) 505 for idx := range valid { 506 valid[idx] = true 507 } 508 for i := 0; i < numNulls; i++ { 509 valid[i*2] = false 510 } 511 512 values := make([]bool, size) 513 FillRandomBooleans(0.5, 0, values) 514 bldr.AppendValues(values, valid) 515 return bldr.NewArray() 516 } 517 return nil 518 }