github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/testutils/random_arrow.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package testutils
    18  
    19  import (
    20  	"github.com/apache/arrow/go/v14/arrow"
    21  	"github.com/apache/arrow/go/v14/arrow/array"
    22  	"github.com/apache/arrow/go/v14/arrow/memory"
    23  	"golang.org/x/exp/rand"
    24  )
    25  
    26  // RandomNonNull generates a random arrow array of the requested type with length size with no nulls.
    27  // Accepts float32, float64, all integer primitives, Date32, date64, string, binary, fixed_size_binary, bool and decimal.
    28  //
    29  // Always uses 0 as the seed with the following min/max restrictions:
    30  // int16, uint16, int8, and uint8 will be min 0, max 64
    31  // Date32 and Date64 will be between 0 and 24 * 86400000 in increments of 86400000
    32  // String will all have the value "test-string"
    33  // binary will have each value between length 2 and 12 but random bytes that are not limited to ascii
    34  // fixed size binary will all be of length 10, random bytes are not limited to ascii
    35  // bool will be approximately half false and half true randomly.
    36  func RandomNonNull(mem memory.Allocator, dt arrow.DataType, size int) arrow.Array {
    37  	switch dt.ID() {
    38  	case arrow.FLOAT32:
    39  		bldr := array.NewFloat32Builder(mem)
    40  		defer bldr.Release()
    41  		values := make([]float32, size)
    42  		FillRandomFloat32(0, values)
    43  		bldr.AppendValues(values, nil)
    44  		return bldr.NewArray()
    45  	case arrow.FLOAT64:
    46  		bldr := array.NewFloat64Builder(mem)
    47  		defer bldr.Release()
    48  		values := make([]float64, size)
    49  		FillRandomFloat64(0, values)
    50  		bldr.AppendValues(values, nil)
    51  		return bldr.NewArray()
    52  	case arrow.INT64:
    53  		bldr := array.NewInt64Builder(mem)
    54  		defer bldr.Release()
    55  		values := make([]int64, size)
    56  		FillRandomInt64(0, values)
    57  		bldr.AppendValues(values, nil)
    58  		return bldr.NewArray()
    59  	case arrow.UINT64:
    60  		bldr := array.NewUint64Builder(mem)
    61  		defer bldr.Release()
    62  		values := make([]uint64, size)
    63  		FillRandomUint64(0, values)
    64  		bldr.AppendValues(values, nil)
    65  		return bldr.NewArray()
    66  	case arrow.INT32:
    67  		bldr := array.NewInt32Builder(mem)
    68  		defer bldr.Release()
    69  		values := make([]int32, size)
    70  		FillRandomInt32(0, values)
    71  		bldr.AppendValues(values, nil)
    72  		return bldr.NewArray()
    73  	case arrow.UINT32:
    74  		bldr := array.NewUint32Builder(mem)
    75  		defer bldr.Release()
    76  		values := make([]uint32, size)
    77  		FillRandomUint32(0, values)
    78  		bldr.AppendValues(values, nil)
    79  		return bldr.NewArray()
    80  	case arrow.INT16:
    81  		bldr := array.NewInt16Builder(mem)
    82  		defer bldr.Release()
    83  		values := make([]int16, size)
    84  		FillRandomInt16(0, 0, 64, values)
    85  		bldr.AppendValues(values, nil)
    86  		return bldr.NewArray()
    87  	case arrow.UINT16:
    88  		bldr := array.NewUint16Builder(mem)
    89  		defer bldr.Release()
    90  		values := make([]uint16, size)
    91  		FillRandomUint16(0, 0, 64, values)
    92  		bldr.AppendValues(values, nil)
    93  		return bldr.NewArray()
    94  	case arrow.INT8:
    95  		bldr := array.NewInt8Builder(mem)
    96  		defer bldr.Release()
    97  		values := make([]int8, size)
    98  		FillRandomInt8(0, 0, 64, values)
    99  		bldr.AppendValues(values, nil)
   100  		return bldr.NewArray()
   101  	case arrow.UINT8:
   102  		bldr := array.NewUint8Builder(mem)
   103  		defer bldr.Release()
   104  		values := make([]uint8, size)
   105  		FillRandomUint8(0, 0, 64, values)
   106  		bldr.AppendValues(values, nil)
   107  		return bldr.NewArray()
   108  	case arrow.DATE32:
   109  		bldr := array.NewDate32Builder(mem)
   110  		defer bldr.Release()
   111  		values := make([]int32, size)
   112  		FillRandomInt32Max(0, 24, values)
   113  
   114  		dates := make([]arrow.Date32, size)
   115  		for idx, val := range values {
   116  			dates[idx] = arrow.Date32(val) * 86400000
   117  		}
   118  		bldr.AppendValues(dates, nil)
   119  		return bldr.NewArray()
   120  	case arrow.DATE64:
   121  		bldr := array.NewDate64Builder(mem)
   122  		defer bldr.Release()
   123  		values := make([]int64, size)
   124  		FillRandomInt64Max(0, 24, values)
   125  
   126  		dates := make([]arrow.Date64, size)
   127  		for idx, val := range values {
   128  			dates[idx] = arrow.Date64(val) * 86400000
   129  		}
   130  		bldr.AppendValues(dates, nil)
   131  		return bldr.NewArray()
   132  	case arrow.STRING:
   133  		bldr := array.NewStringBuilder(mem)
   134  		defer bldr.Release()
   135  		for i := 0; i < size; i++ {
   136  			bldr.Append("test-string")
   137  		}
   138  		return bldr.NewArray()
   139  	case arrow.LARGE_STRING:
   140  		bldr := array.NewLargeStringBuilder(mem)
   141  		defer bldr.Release()
   142  		for i := 0; i < size; i++ {
   143  			bldr.Append("test-large-string")
   144  		}
   145  		return bldr.NewArray()
   146  	case arrow.BINARY, arrow.LARGE_BINARY:
   147  		bldr := array.NewBinaryBuilder(mem, dt.(arrow.BinaryDataType))
   148  		defer bldr.Release()
   149  
   150  		buf := make([]byte, 12)
   151  		r := rand.New(rand.NewSource(0))
   152  		for i := 0; i < size; i++ {
   153  			length := r.Intn(12-2+1) + 2
   154  			r.Read(buf[:length])
   155  			bldr.Append(buf[:length])
   156  		}
   157  		return bldr.NewArray()
   158  	case arrow.FIXED_SIZE_BINARY:
   159  		bldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 10})
   160  		defer bldr.Release()
   161  
   162  		buf := make([]byte, 10)
   163  		r := rand.New(rand.NewSource(0))
   164  		for i := 0; i < size; i++ {
   165  			r.Read(buf)
   166  			bldr.Append(buf)
   167  		}
   168  		return bldr.NewArray()
   169  	case arrow.DECIMAL:
   170  		dectype := dt.(*arrow.Decimal128Type)
   171  		bldr := array.NewDecimal128Builder(mem, dectype)
   172  		defer bldr.Release()
   173  
   174  		data := RandomDecimals(int64(size), 0, dectype.Precision)
   175  		bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), nil)
   176  		return bldr.NewArray()
   177  	case arrow.BOOL:
   178  		bldr := array.NewBooleanBuilder(mem)
   179  		defer bldr.Release()
   180  
   181  		values := make([]bool, size)
   182  		FillRandomBooleans(0.5, 0, values)
   183  		bldr.AppendValues(values, nil)
   184  		return bldr.NewArray()
   185  	}
   186  	return nil
   187  }
   188  
   189  // RandomNullable generates a random arrow array of length size with approximately numNulls,
   190  // at most there can be size/2 nulls. Other than there being nulls, the values follow the same rules
   191  // as described in the docs for RandomNonNull.
   192  func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array {
   193  	switch dt.ID() {
   194  	case arrow.FLOAT32:
   195  		bldr := array.NewFloat32Builder(memory.DefaultAllocator)
   196  		defer bldr.Release()
   197  		values := make([]float32, size)
   198  		FillRandomFloat32(0, values)
   199  
   200  		valid := make([]bool, size)
   201  		for idx := range valid {
   202  			valid[idx] = true
   203  		}
   204  		for i := 0; i < numNulls; i++ {
   205  			valid[i*2] = false
   206  		}
   207  		bldr.AppendValues(values, valid)
   208  		return bldr.NewArray()
   209  	case arrow.FLOAT64:
   210  		bldr := array.NewFloat64Builder(memory.DefaultAllocator)
   211  		defer bldr.Release()
   212  		values := make([]float64, size)
   213  		FillRandomFloat64(0, values)
   214  
   215  		valid := make([]bool, size)
   216  		for idx := range valid {
   217  			valid[idx] = true
   218  		}
   219  		for i := 0; i < numNulls; i++ {
   220  			valid[i*2] = false
   221  		}
   222  		bldr.AppendValues(values, valid)
   223  		return bldr.NewArray()
   224  	case arrow.INT8:
   225  		bldr := array.NewInt8Builder(memory.DefaultAllocator)
   226  		defer bldr.Release()
   227  		values := make([]int8, size)
   228  		FillRandomInt8(0, 0, 64, values)
   229  		valid := make([]bool, size)
   230  		for idx := range valid {
   231  			valid[idx] = true
   232  		}
   233  		for i := 0; i < numNulls; i++ {
   234  			valid[i*2] = false
   235  		}
   236  
   237  		bldr.AppendValues(values, valid)
   238  		return bldr.NewArray()
   239  	case arrow.UINT8:
   240  		bldr := array.NewUint8Builder(memory.DefaultAllocator)
   241  		defer bldr.Release()
   242  		values := make([]uint8, size)
   243  		FillRandomUint8(0, 0, 64, values)
   244  		valid := make([]bool, size)
   245  		for idx := range valid {
   246  			valid[idx] = true
   247  		}
   248  		for i := 0; i < numNulls; i++ {
   249  			valid[i*2] = false
   250  		}
   251  
   252  		bldr.AppendValues(values, valid)
   253  		return bldr.NewArray()
   254  	case arrow.INT16:
   255  		bldr := array.NewInt16Builder(memory.DefaultAllocator)
   256  		defer bldr.Release()
   257  		values := make([]int16, size)
   258  		FillRandomInt16(0, 0, 64, values)
   259  		valid := make([]bool, size)
   260  		for idx := range valid {
   261  			valid[idx] = true
   262  		}
   263  		for i := 0; i < numNulls; i++ {
   264  			valid[i*2] = false
   265  		}
   266  
   267  		bldr.AppendValues(values, valid)
   268  		return bldr.NewArray()
   269  	case arrow.UINT16:
   270  		bldr := array.NewUint16Builder(memory.DefaultAllocator)
   271  		defer bldr.Release()
   272  		values := make([]uint16, size)
   273  		FillRandomUint16(0, 0, 64, values)
   274  		valid := make([]bool, size)
   275  		for idx := range valid {
   276  			valid[idx] = true
   277  		}
   278  		for i := 0; i < numNulls; i++ {
   279  			valid[i*2] = false
   280  		}
   281  
   282  		bldr.AppendValues(values, valid)
   283  		return bldr.NewArray()
   284  	case arrow.INT32:
   285  		bldr := array.NewInt32Builder(memory.DefaultAllocator)
   286  		defer bldr.Release()
   287  		values := make([]int32, size)
   288  		FillRandomInt32Max(0, 64, values)
   289  		valid := make([]bool, size)
   290  		for idx := range valid {
   291  			valid[idx] = true
   292  		}
   293  		for i := 0; i < numNulls; i++ {
   294  			valid[i*2] = false
   295  		}
   296  
   297  		bldr.AppendValues(values, valid)
   298  		return bldr.NewArray()
   299  	case arrow.UINT32:
   300  		bldr := array.NewUint32Builder(memory.DefaultAllocator)
   301  		defer bldr.Release()
   302  		values := make([]uint32, size)
   303  		FillRandomUint32Max(0, 64, values)
   304  		valid := make([]bool, size)
   305  		for idx := range valid {
   306  			valid[idx] = true
   307  		}
   308  		for i := 0; i < numNulls; i++ {
   309  			valid[i*2] = false
   310  		}
   311  
   312  		bldr.AppendValues(values, valid)
   313  		return bldr.NewArray()
   314  
   315  	case arrow.INT64:
   316  		bldr := array.NewInt64Builder(memory.DefaultAllocator)
   317  		defer bldr.Release()
   318  		values := make([]int64, size)
   319  		FillRandomInt64Max(0, 64, values)
   320  		valid := make([]bool, size)
   321  		for idx := range valid {
   322  			valid[idx] = true
   323  		}
   324  		for i := 0; i < numNulls; i++ {
   325  			valid[i*2] = false
   326  		}
   327  
   328  		bldr.AppendValues(values, valid)
   329  		return bldr.NewArray()
   330  	case arrow.UINT64:
   331  		bldr := array.NewUint64Builder(memory.DefaultAllocator)
   332  		defer bldr.Release()
   333  		values := make([]uint64, size)
   334  		FillRandomUint64Max(0, 64, values)
   335  		valid := make([]bool, size)
   336  		for idx := range valid {
   337  			valid[idx] = true
   338  		}
   339  		for i := 0; i < numNulls; i++ {
   340  			valid[i*2] = false
   341  		}
   342  
   343  		bldr.AppendValues(values, valid)
   344  		return bldr.NewArray()
   345  	case arrow.DATE32:
   346  		bldr := array.NewDate32Builder(memory.DefaultAllocator)
   347  		defer bldr.Release()
   348  		values := make([]int32, size)
   349  		FillRandomInt32Max(0, 24, values)
   350  
   351  		dates := make([]arrow.Date32, size)
   352  		for idx, val := range values {
   353  			dates[idx] = arrow.Date32(val) * 86400000
   354  		}
   355  		valid := make([]bool, size)
   356  		for idx := range valid {
   357  			valid[idx] = true
   358  		}
   359  		for i := 0; i < numNulls; i++ {
   360  			valid[i*2] = false
   361  		}
   362  		bldr.AppendValues(dates, valid)
   363  		return bldr.NewArray()
   364  	case arrow.DATE64:
   365  		bldr := array.NewDate64Builder(memory.DefaultAllocator)
   366  		defer bldr.Release()
   367  		values := make([]int64, size)
   368  		FillRandomInt64Max(0, 24, values)
   369  
   370  		dates := make([]arrow.Date64, size)
   371  		for idx, val := range values {
   372  			dates[idx] = arrow.Date64(val) * 86400000
   373  		}
   374  		valid := make([]bool, size)
   375  		for idx := range valid {
   376  			valid[idx] = true
   377  		}
   378  		for i := 0; i < numNulls; i++ {
   379  			valid[i*2] = false
   380  		}
   381  		bldr.AppendValues(dates, valid)
   382  		return bldr.NewArray()
   383  	case arrow.BINARY:
   384  		bldr := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary)
   385  		defer bldr.Release()
   386  
   387  		valid := make([]bool, size)
   388  		for idx := range valid {
   389  			valid[idx] = true
   390  		}
   391  		for i := 0; i < numNulls; i++ {
   392  			valid[i*2] = false
   393  		}
   394  
   395  		buf := make([]byte, 12)
   396  		r := rand.New(rand.NewSource(0))
   397  		for i := 0; i < size; i++ {
   398  			if !valid[i] {
   399  				bldr.AppendNull()
   400  				continue
   401  			}
   402  
   403  			length := r.Intn(12-2+1) + 2
   404  			r.Read(buf[:length])
   405  			bldr.Append(buf[:length])
   406  		}
   407  		return bldr.NewArray()
   408  	case arrow.STRING:
   409  		bldr := array.NewStringBuilder(memory.DefaultAllocator)
   410  		defer bldr.Release()
   411  
   412  		valid := make([]bool, size)
   413  		for idx := range valid {
   414  			valid[idx] = true
   415  		}
   416  		for i := 0; i < numNulls; i++ {
   417  			valid[i*2] = false
   418  		}
   419  
   420  		buf := make([]byte, 12)
   421  		r := rand.New(rand.NewSource(0))
   422  		for i := 0; i < size; i++ {
   423  			if !valid[i] {
   424  				bldr.AppendNull()
   425  				continue
   426  			}
   427  
   428  			length := r.Intn(12-2+1) + 2
   429  			r.Read(buf[:length])
   430  			// trivially force data to be valid UTF8 by making it all ASCII
   431  			for idx := range buf[:length] {
   432  				buf[idx] &= 0x7f
   433  			}
   434  			bldr.Append(string(buf[:length]))
   435  		}
   436  		return bldr.NewArray()
   437  	case arrow.FIXED_SIZE_BINARY:
   438  		bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 10})
   439  		defer bldr.Release()
   440  
   441  		valid := make([]bool, size)
   442  		for idx := range valid {
   443  			valid[idx] = true
   444  		}
   445  		for i := 0; i < numNulls; i++ {
   446  			valid[i*2] = false
   447  		}
   448  
   449  		buf := make([]byte, 10)
   450  		r := rand.New(rand.NewSource(0))
   451  		for i := 0; i < size; i++ {
   452  			if !valid[i] {
   453  				bldr.AppendNull()
   454  				continue
   455  			}
   456  
   457  			r.Read(buf)
   458  			bldr.Append(buf)
   459  		}
   460  		return bldr.NewArray()
   461  	case arrow.DECIMAL:
   462  		dectype := dt.(*arrow.Decimal128Type)
   463  		bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dectype)
   464  		defer bldr.Release()
   465  
   466  		valid := make([]bool, size)
   467  		for idx := range valid {
   468  			valid[idx] = true
   469  		}
   470  		for i := 0; i < numNulls; i++ {
   471  			valid[i*2] = false
   472  		}
   473  
   474  		data := RandomDecimals(int64(size), 0, dectype.Precision)
   475  		bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), valid)
   476  		return bldr.NewArray()
   477  	case arrow.BOOL:
   478  		bldr := array.NewBooleanBuilder(memory.DefaultAllocator)
   479  		defer bldr.Release()
   480  
   481  		valid := make([]bool, size)
   482  		for idx := range valid {
   483  			valid[idx] = true
   484  		}
   485  		for i := 0; i < numNulls; i++ {
   486  			valid[i*2] = false
   487  		}
   488  
   489  		values := make([]bool, size)
   490  		FillRandomBooleans(0.5, 0, values)
   491  		bldr.AppendValues(values, valid)
   492  		return bldr.NewArray()
   493  	}
   494  	return nil
   495  }