github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/testutils/random_arrow.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package testutils 18 19 import ( 20 "github.com/apache/arrow/go/v7/arrow" 21 "github.com/apache/arrow/go/v7/arrow/array" 22 "github.com/apache/arrow/go/v7/arrow/memory" 23 "golang.org/x/exp/rand" 24 ) 25 26 // RandomNonNull generates a random arrow array of the requested type with length size with no nulls. 27 // Accepts float32, float64, all integer primitives, Date32, date64, string, binary, fixed_size_binary, bool and decimal. 28 // 29 // Always uses 0 as the seed with the following min/max restrictions: 30 // int16, uint16, int8, and uint8 will be min 0, max 64 31 // Date32 and Date64 will be between 0 and 24 * 86400000 in increments of 86400000 32 // String will all have the value "test-string" 33 // binary will have each value between length 2 and 12 but random bytes that are not limited to ascii 34 // fixed size binary will all be of length 10, random bytes are not limited to ascii 35 // bool will be approximately half false and half true randomly. 36 func RandomNonNull(dt arrow.DataType, size int) arrow.Array { 37 switch dt.ID() { 38 case arrow.FLOAT32: 39 bldr := array.NewFloat32Builder(memory.DefaultAllocator) 40 defer bldr.Release() 41 values := make([]float32, size) 42 FillRandomFloat32(0, values) 43 bldr.AppendValues(values, nil) 44 return bldr.NewArray() 45 case arrow.FLOAT64: 46 bldr := array.NewFloat64Builder(memory.DefaultAllocator) 47 defer bldr.Release() 48 values := make([]float64, size) 49 FillRandomFloat64(0, values) 50 bldr.AppendValues(values, nil) 51 return bldr.NewArray() 52 case arrow.INT64: 53 bldr := array.NewInt64Builder(memory.DefaultAllocator) 54 defer bldr.Release() 55 values := make([]int64, size) 56 FillRandomInt64(0, values) 57 bldr.AppendValues(values, nil) 58 return bldr.NewArray() 59 case arrow.UINT64: 60 bldr := array.NewUint64Builder(memory.DefaultAllocator) 61 defer bldr.Release() 62 values := make([]uint64, size) 63 FillRandomUint64(0, values) 64 bldr.AppendValues(values, nil) 65 return bldr.NewArray() 66 case arrow.INT32: 67 bldr := array.NewInt32Builder(memory.DefaultAllocator) 68 defer bldr.Release() 69 values := make([]int32, size) 70 FillRandomInt32(0, values) 71 bldr.AppendValues(values, nil) 72 return bldr.NewArray() 73 case arrow.UINT32: 74 bldr := array.NewUint32Builder(memory.DefaultAllocator) 75 defer bldr.Release() 76 values := make([]uint32, size) 77 FillRandomUint32(0, values) 78 bldr.AppendValues(values, nil) 79 return bldr.NewArray() 80 case arrow.INT16: 81 bldr := array.NewInt16Builder(memory.DefaultAllocator) 82 defer bldr.Release() 83 values := make([]int16, size) 84 FillRandomInt16(0, 0, 64, values) 85 bldr.AppendValues(values, nil) 86 return bldr.NewArray() 87 case arrow.UINT16: 88 bldr := array.NewUint16Builder(memory.DefaultAllocator) 89 defer bldr.Release() 90 values := make([]uint16, size) 91 FillRandomUint16(0, 0, 64, values) 92 bldr.AppendValues(values, nil) 93 return bldr.NewArray() 94 case arrow.INT8: 95 bldr := array.NewInt8Builder(memory.DefaultAllocator) 96 defer bldr.Release() 97 values := make([]int8, size) 98 FillRandomInt8(0, 0, 64, values) 99 bldr.AppendValues(values, nil) 100 return bldr.NewArray() 101 case arrow.UINT8: 102 bldr := array.NewUint8Builder(memory.DefaultAllocator) 103 defer bldr.Release() 104 values := make([]uint8, size) 105 FillRandomUint8(0, 0, 64, values) 106 bldr.AppendValues(values, nil) 107 return bldr.NewArray() 108 case arrow.DATE32: 109 bldr := array.NewDate32Builder(memory.DefaultAllocator) 110 defer bldr.Release() 111 values := make([]int32, size) 112 FillRandomInt32Max(0, 24, values) 113 114 dates := make([]arrow.Date32, size) 115 for idx, val := range values { 116 dates[idx] = arrow.Date32(val) * 86400000 117 } 118 bldr.AppendValues(dates, nil) 119 return bldr.NewArray() 120 case arrow.DATE64: 121 bldr := array.NewDate64Builder(memory.DefaultAllocator) 122 defer bldr.Release() 123 values := make([]int64, size) 124 FillRandomInt64Max(0, 24, values) 125 126 dates := make([]arrow.Date64, size) 127 for idx, val := range values { 128 dates[idx] = arrow.Date64(val) * 86400000 129 } 130 bldr.AppendValues(dates, nil) 131 return bldr.NewArray() 132 case arrow.STRING: 133 bldr := array.NewStringBuilder(memory.DefaultAllocator) 134 defer bldr.Release() 135 for i := 0; i < size; i++ { 136 bldr.Append("test-string") 137 } 138 return bldr.NewArray() 139 case arrow.BINARY: 140 bldr := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) 141 defer bldr.Release() 142 143 buf := make([]byte, 12) 144 r := rand.New(rand.NewSource(0)) 145 for i := 0; i < size; i++ { 146 length := r.Intn(12-2+1) + 2 147 r.Read(buf[:length]) 148 bldr.Append(buf[:length]) 149 } 150 return bldr.NewArray() 151 case arrow.FIXED_SIZE_BINARY: 152 bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 10}) 153 defer bldr.Release() 154 155 buf := make([]byte, 10) 156 r := rand.New(rand.NewSource(0)) 157 for i := 0; i < size; i++ { 158 r.Read(buf) 159 bldr.Append(buf) 160 } 161 return bldr.NewArray() 162 case arrow.DECIMAL: 163 dectype := dt.(*arrow.Decimal128Type) 164 bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dectype) 165 defer bldr.Release() 166 167 data := RandomDecimals(int64(size), 0, dectype.Precision) 168 bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), nil) 169 return bldr.NewArray() 170 case arrow.BOOL: 171 bldr := array.NewBooleanBuilder(memory.DefaultAllocator) 172 defer bldr.Release() 173 174 values := make([]bool, size) 175 FillRandomBooleans(0.5, 0, values) 176 bldr.AppendValues(values, nil) 177 return bldr.NewArray() 178 } 179 return nil 180 } 181 182 // RandomNullable generates a random arrow array of length size with approximately numNulls, 183 // at most there can be size/2 nulls. Other than there being nulls, the values follow the same rules 184 // as described in the docs for RandomNonNull. 185 func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array { 186 switch dt.ID() { 187 case arrow.FLOAT32: 188 bldr := array.NewFloat32Builder(memory.DefaultAllocator) 189 defer bldr.Release() 190 values := make([]float32, size) 191 FillRandomFloat32(0, values) 192 193 valid := make([]bool, size) 194 for idx := range valid { 195 valid[idx] = true 196 } 197 for i := 0; i < numNulls; i++ { 198 valid[i*2] = false 199 } 200 bldr.AppendValues(values, valid) 201 return bldr.NewArray() 202 case arrow.FLOAT64: 203 bldr := array.NewFloat64Builder(memory.DefaultAllocator) 204 defer bldr.Release() 205 values := make([]float64, size) 206 FillRandomFloat64(0, values) 207 208 valid := make([]bool, size) 209 for idx := range valid { 210 valid[idx] = true 211 } 212 for i := 0; i < numNulls; i++ { 213 valid[i*2] = false 214 } 215 bldr.AppendValues(values, valid) 216 return bldr.NewArray() 217 case arrow.INT8: 218 bldr := array.NewInt8Builder(memory.DefaultAllocator) 219 defer bldr.Release() 220 values := make([]int8, size) 221 FillRandomInt8(0, 0, 64, values) 222 valid := make([]bool, size) 223 for idx := range valid { 224 valid[idx] = true 225 } 226 for i := 0; i < numNulls; i++ { 227 valid[i*2] = false 228 } 229 230 bldr.AppendValues(values, valid) 231 return bldr.NewArray() 232 case arrow.UINT8: 233 bldr := array.NewUint8Builder(memory.DefaultAllocator) 234 defer bldr.Release() 235 values := make([]uint8, size) 236 FillRandomUint8(0, 0, 64, values) 237 valid := make([]bool, size) 238 for idx := range valid { 239 valid[idx] = true 240 } 241 for i := 0; i < numNulls; i++ { 242 valid[i*2] = false 243 } 244 245 bldr.AppendValues(values, valid) 246 return bldr.NewArray() 247 case arrow.INT16: 248 bldr := array.NewInt16Builder(memory.DefaultAllocator) 249 defer bldr.Release() 250 values := make([]int16, size) 251 FillRandomInt16(0, 0, 64, values) 252 valid := make([]bool, size) 253 for idx := range valid { 254 valid[idx] = true 255 } 256 for i := 0; i < numNulls; i++ { 257 valid[i*2] = false 258 } 259 260 bldr.AppendValues(values, valid) 261 return bldr.NewArray() 262 case arrow.UINT16: 263 bldr := array.NewUint16Builder(memory.DefaultAllocator) 264 defer bldr.Release() 265 values := make([]uint16, size) 266 FillRandomUint16(0, 0, 64, values) 267 valid := make([]bool, size) 268 for idx := range valid { 269 valid[idx] = true 270 } 271 for i := 0; i < numNulls; i++ { 272 valid[i*2] = false 273 } 274 275 bldr.AppendValues(values, valid) 276 return bldr.NewArray() 277 case arrow.INT32: 278 bldr := array.NewInt32Builder(memory.DefaultAllocator) 279 defer bldr.Release() 280 values := make([]int32, size) 281 FillRandomInt32Max(0, 64, values) 282 valid := make([]bool, size) 283 for idx := range valid { 284 valid[idx] = true 285 } 286 for i := 0; i < numNulls; i++ { 287 valid[i*2] = false 288 } 289 290 bldr.AppendValues(values, valid) 291 return bldr.NewArray() 292 case arrow.UINT32: 293 bldr := array.NewUint32Builder(memory.DefaultAllocator) 294 defer bldr.Release() 295 values := make([]uint32, size) 296 FillRandomUint32Max(0, 64, values) 297 valid := make([]bool, size) 298 for idx := range valid { 299 valid[idx] = true 300 } 301 for i := 0; i < numNulls; i++ { 302 valid[i*2] = false 303 } 304 305 bldr.AppendValues(values, valid) 306 return bldr.NewArray() 307 308 case arrow.INT64: 309 bldr := array.NewInt64Builder(memory.DefaultAllocator) 310 defer bldr.Release() 311 values := make([]int64, size) 312 FillRandomInt64Max(0, 64, values) 313 valid := make([]bool, size) 314 for idx := range valid { 315 valid[idx] = true 316 } 317 for i := 0; i < numNulls; i++ { 318 valid[i*2] = false 319 } 320 321 bldr.AppendValues(values, valid) 322 return bldr.NewArray() 323 case arrow.UINT64: 324 bldr := array.NewUint64Builder(memory.DefaultAllocator) 325 defer bldr.Release() 326 values := make([]uint64, size) 327 FillRandomUint64Max(0, 64, values) 328 valid := make([]bool, size) 329 for idx := range valid { 330 valid[idx] = true 331 } 332 for i := 0; i < numNulls; i++ { 333 valid[i*2] = false 334 } 335 336 bldr.AppendValues(values, valid) 337 return bldr.NewArray() 338 case arrow.DATE32: 339 bldr := array.NewDate32Builder(memory.DefaultAllocator) 340 defer bldr.Release() 341 values := make([]int32, size) 342 FillRandomInt32Max(0, 24, values) 343 344 dates := make([]arrow.Date32, size) 345 for idx, val := range values { 346 dates[idx] = arrow.Date32(val) * 86400000 347 } 348 valid := make([]bool, size) 349 for idx := range valid { 350 valid[idx] = true 351 } 352 for i := 0; i < numNulls; i++ { 353 valid[i*2] = false 354 } 355 bldr.AppendValues(dates, valid) 356 return bldr.NewArray() 357 case arrow.DATE64: 358 bldr := array.NewDate64Builder(memory.DefaultAllocator) 359 defer bldr.Release() 360 values := make([]int64, size) 361 FillRandomInt64Max(0, 24, values) 362 363 dates := make([]arrow.Date64, size) 364 for idx, val := range values { 365 dates[idx] = arrow.Date64(val) * 86400000 366 } 367 valid := make([]bool, size) 368 for idx := range valid { 369 valid[idx] = true 370 } 371 for i := 0; i < numNulls; i++ { 372 valid[i*2] = false 373 } 374 bldr.AppendValues(dates, valid) 375 return bldr.NewArray() 376 case arrow.BINARY: 377 bldr := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) 378 defer bldr.Release() 379 380 valid := make([]bool, size) 381 for idx := range valid { 382 valid[idx] = true 383 } 384 for i := 0; i < numNulls; i++ { 385 valid[i*2] = false 386 } 387 388 buf := make([]byte, 12) 389 r := rand.New(rand.NewSource(0)) 390 for i := 0; i < size; i++ { 391 if !valid[i] { 392 bldr.AppendNull() 393 continue 394 } 395 396 length := r.Intn(12-2+1) + 2 397 r.Read(buf[:length]) 398 bldr.Append(buf[:length]) 399 } 400 return bldr.NewArray() 401 case arrow.STRING: 402 bldr := array.NewStringBuilder(memory.DefaultAllocator) 403 defer bldr.Release() 404 405 valid := make([]bool, size) 406 for idx := range valid { 407 valid[idx] = true 408 } 409 for i := 0; i < numNulls; i++ { 410 valid[i*2] = false 411 } 412 413 buf := make([]byte, 12) 414 r := rand.New(rand.NewSource(0)) 415 for i := 0; i < size; i++ { 416 if !valid[i] { 417 bldr.AppendNull() 418 continue 419 } 420 421 length := r.Intn(12-2+1) + 2 422 r.Read(buf[:length]) 423 // trivially force data to be valid UTF8 by making it all ASCII 424 for idx := range buf[:length] { 425 buf[idx] &= 0x7f 426 } 427 bldr.Append(string(buf[:length])) 428 } 429 return bldr.NewArray() 430 case arrow.FIXED_SIZE_BINARY: 431 bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 10}) 432 defer bldr.Release() 433 434 valid := make([]bool, size) 435 for idx := range valid { 436 valid[idx] = true 437 } 438 for i := 0; i < numNulls; i++ { 439 valid[i*2] = false 440 } 441 442 buf := make([]byte, 10) 443 r := rand.New(rand.NewSource(0)) 444 for i := 0; i < size; i++ { 445 if !valid[i] { 446 bldr.AppendNull() 447 continue 448 } 449 450 r.Read(buf) 451 bldr.Append(buf) 452 } 453 return bldr.NewArray() 454 case arrow.DECIMAL: 455 dectype := dt.(*arrow.Decimal128Type) 456 bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dectype) 457 defer bldr.Release() 458 459 valid := make([]bool, size) 460 for idx := range valid { 461 valid[idx] = true 462 } 463 for i := 0; i < numNulls; i++ { 464 valid[i*2] = false 465 } 466 467 data := RandomDecimals(int64(size), 0, dectype.Precision) 468 bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), valid) 469 return bldr.NewArray() 470 case arrow.BOOL: 471 bldr := array.NewBooleanBuilder(memory.DefaultAllocator) 472 defer bldr.Release() 473 474 valid := make([]bool, size) 475 for idx := range valid { 476 valid[idx] = true 477 } 478 for i := 0; i < numNulls; i++ { 479 valid[i*2] = false 480 } 481 482 values := make([]bool, size) 483 FillRandomBooleans(0.5, 0, values) 484 bldr.AppendValues(values, valid) 485 return bldr.NewArray() 486 } 487 return nil 488 }