github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/testutils/random_arrow.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package testutils
    18  
    19  import (
    20  	"github.com/apache/arrow/go/v7/arrow"
    21  	"github.com/apache/arrow/go/v7/arrow/array"
    22  	"github.com/apache/arrow/go/v7/arrow/memory"
    23  	"golang.org/x/exp/rand"
    24  )
    25  
    26  // RandomNonNull generates a random arrow array of the requested type with length size with no nulls.
    27  // Accepts float32, float64, all integer primitives, Date32, date64, string, binary, fixed_size_binary, bool and decimal.
    28  //
    29  // Always uses 0 as the seed with the following min/max restrictions:
    30  // int16, uint16, int8, and uint8 will be min 0, max 64
    31  // Date32 and Date64 will be between 0 and 24 * 86400000 in increments of 86400000
    32  // String will all have the value "test-string"
    33  // binary will have each value between length 2 and 12 but random bytes that are not limited to ascii
    34  // fixed size binary will all be of length 10, random bytes are not limited to ascii
    35  // bool will be approximately half false and half true randomly.
    36  func RandomNonNull(dt arrow.DataType, size int) arrow.Array {
    37  	switch dt.ID() {
    38  	case arrow.FLOAT32:
    39  		bldr := array.NewFloat32Builder(memory.DefaultAllocator)
    40  		defer bldr.Release()
    41  		values := make([]float32, size)
    42  		FillRandomFloat32(0, values)
    43  		bldr.AppendValues(values, nil)
    44  		return bldr.NewArray()
    45  	case arrow.FLOAT64:
    46  		bldr := array.NewFloat64Builder(memory.DefaultAllocator)
    47  		defer bldr.Release()
    48  		values := make([]float64, size)
    49  		FillRandomFloat64(0, values)
    50  		bldr.AppendValues(values, nil)
    51  		return bldr.NewArray()
    52  	case arrow.INT64:
    53  		bldr := array.NewInt64Builder(memory.DefaultAllocator)
    54  		defer bldr.Release()
    55  		values := make([]int64, size)
    56  		FillRandomInt64(0, values)
    57  		bldr.AppendValues(values, nil)
    58  		return bldr.NewArray()
    59  	case arrow.UINT64:
    60  		bldr := array.NewUint64Builder(memory.DefaultAllocator)
    61  		defer bldr.Release()
    62  		values := make([]uint64, size)
    63  		FillRandomUint64(0, values)
    64  		bldr.AppendValues(values, nil)
    65  		return bldr.NewArray()
    66  	case arrow.INT32:
    67  		bldr := array.NewInt32Builder(memory.DefaultAllocator)
    68  		defer bldr.Release()
    69  		values := make([]int32, size)
    70  		FillRandomInt32(0, values)
    71  		bldr.AppendValues(values, nil)
    72  		return bldr.NewArray()
    73  	case arrow.UINT32:
    74  		bldr := array.NewUint32Builder(memory.DefaultAllocator)
    75  		defer bldr.Release()
    76  		values := make([]uint32, size)
    77  		FillRandomUint32(0, values)
    78  		bldr.AppendValues(values, nil)
    79  		return bldr.NewArray()
    80  	case arrow.INT16:
    81  		bldr := array.NewInt16Builder(memory.DefaultAllocator)
    82  		defer bldr.Release()
    83  		values := make([]int16, size)
    84  		FillRandomInt16(0, 0, 64, values)
    85  		bldr.AppendValues(values, nil)
    86  		return bldr.NewArray()
    87  	case arrow.UINT16:
    88  		bldr := array.NewUint16Builder(memory.DefaultAllocator)
    89  		defer bldr.Release()
    90  		values := make([]uint16, size)
    91  		FillRandomUint16(0, 0, 64, values)
    92  		bldr.AppendValues(values, nil)
    93  		return bldr.NewArray()
    94  	case arrow.INT8:
    95  		bldr := array.NewInt8Builder(memory.DefaultAllocator)
    96  		defer bldr.Release()
    97  		values := make([]int8, size)
    98  		FillRandomInt8(0, 0, 64, values)
    99  		bldr.AppendValues(values, nil)
   100  		return bldr.NewArray()
   101  	case arrow.UINT8:
   102  		bldr := array.NewUint8Builder(memory.DefaultAllocator)
   103  		defer bldr.Release()
   104  		values := make([]uint8, size)
   105  		FillRandomUint8(0, 0, 64, values)
   106  		bldr.AppendValues(values, nil)
   107  		return bldr.NewArray()
   108  	case arrow.DATE32:
   109  		bldr := array.NewDate32Builder(memory.DefaultAllocator)
   110  		defer bldr.Release()
   111  		values := make([]int32, size)
   112  		FillRandomInt32Max(0, 24, values)
   113  
   114  		dates := make([]arrow.Date32, size)
   115  		for idx, val := range values {
   116  			dates[idx] = arrow.Date32(val) * 86400000
   117  		}
   118  		bldr.AppendValues(dates, nil)
   119  		return bldr.NewArray()
   120  	case arrow.DATE64:
   121  		bldr := array.NewDate64Builder(memory.DefaultAllocator)
   122  		defer bldr.Release()
   123  		values := make([]int64, size)
   124  		FillRandomInt64Max(0, 24, values)
   125  
   126  		dates := make([]arrow.Date64, size)
   127  		for idx, val := range values {
   128  			dates[idx] = arrow.Date64(val) * 86400000
   129  		}
   130  		bldr.AppendValues(dates, nil)
   131  		return bldr.NewArray()
   132  	case arrow.STRING:
   133  		bldr := array.NewStringBuilder(memory.DefaultAllocator)
   134  		defer bldr.Release()
   135  		for i := 0; i < size; i++ {
   136  			bldr.Append("test-string")
   137  		}
   138  		return bldr.NewArray()
   139  	case arrow.BINARY:
   140  		bldr := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary)
   141  		defer bldr.Release()
   142  
   143  		buf := make([]byte, 12)
   144  		r := rand.New(rand.NewSource(0))
   145  		for i := 0; i < size; i++ {
   146  			length := r.Intn(12-2+1) + 2
   147  			r.Read(buf[:length])
   148  			bldr.Append(buf[:length])
   149  		}
   150  		return bldr.NewArray()
   151  	case arrow.FIXED_SIZE_BINARY:
   152  		bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 10})
   153  		defer bldr.Release()
   154  
   155  		buf := make([]byte, 10)
   156  		r := rand.New(rand.NewSource(0))
   157  		for i := 0; i < size; i++ {
   158  			r.Read(buf)
   159  			bldr.Append(buf)
   160  		}
   161  		return bldr.NewArray()
   162  	case arrow.DECIMAL:
   163  		dectype := dt.(*arrow.Decimal128Type)
   164  		bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dectype)
   165  		defer bldr.Release()
   166  
   167  		data := RandomDecimals(int64(size), 0, dectype.Precision)
   168  		bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), nil)
   169  		return bldr.NewArray()
   170  	case arrow.BOOL:
   171  		bldr := array.NewBooleanBuilder(memory.DefaultAllocator)
   172  		defer bldr.Release()
   173  
   174  		values := make([]bool, size)
   175  		FillRandomBooleans(0.5, 0, values)
   176  		bldr.AppendValues(values, nil)
   177  		return bldr.NewArray()
   178  	}
   179  	return nil
   180  }
   181  
   182  // RandomNullable generates a random arrow array of length size with approximately numNulls,
   183  // at most there can be size/2 nulls. Other than there being nulls, the values follow the same rules
   184  // as described in the docs for RandomNonNull.
   185  func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array {
   186  	switch dt.ID() {
   187  	case arrow.FLOAT32:
   188  		bldr := array.NewFloat32Builder(memory.DefaultAllocator)
   189  		defer bldr.Release()
   190  		values := make([]float32, size)
   191  		FillRandomFloat32(0, values)
   192  
   193  		valid := make([]bool, size)
   194  		for idx := range valid {
   195  			valid[idx] = true
   196  		}
   197  		for i := 0; i < numNulls; i++ {
   198  			valid[i*2] = false
   199  		}
   200  		bldr.AppendValues(values, valid)
   201  		return bldr.NewArray()
   202  	case arrow.FLOAT64:
   203  		bldr := array.NewFloat64Builder(memory.DefaultAllocator)
   204  		defer bldr.Release()
   205  		values := make([]float64, size)
   206  		FillRandomFloat64(0, values)
   207  
   208  		valid := make([]bool, size)
   209  		for idx := range valid {
   210  			valid[idx] = true
   211  		}
   212  		for i := 0; i < numNulls; i++ {
   213  			valid[i*2] = false
   214  		}
   215  		bldr.AppendValues(values, valid)
   216  		return bldr.NewArray()
   217  	case arrow.INT8:
   218  		bldr := array.NewInt8Builder(memory.DefaultAllocator)
   219  		defer bldr.Release()
   220  		values := make([]int8, size)
   221  		FillRandomInt8(0, 0, 64, values)
   222  		valid := make([]bool, size)
   223  		for idx := range valid {
   224  			valid[idx] = true
   225  		}
   226  		for i := 0; i < numNulls; i++ {
   227  			valid[i*2] = false
   228  		}
   229  
   230  		bldr.AppendValues(values, valid)
   231  		return bldr.NewArray()
   232  	case arrow.UINT8:
   233  		bldr := array.NewUint8Builder(memory.DefaultAllocator)
   234  		defer bldr.Release()
   235  		values := make([]uint8, size)
   236  		FillRandomUint8(0, 0, 64, values)
   237  		valid := make([]bool, size)
   238  		for idx := range valid {
   239  			valid[idx] = true
   240  		}
   241  		for i := 0; i < numNulls; i++ {
   242  			valid[i*2] = false
   243  		}
   244  
   245  		bldr.AppendValues(values, valid)
   246  		return bldr.NewArray()
   247  	case arrow.INT16:
   248  		bldr := array.NewInt16Builder(memory.DefaultAllocator)
   249  		defer bldr.Release()
   250  		values := make([]int16, size)
   251  		FillRandomInt16(0, 0, 64, values)
   252  		valid := make([]bool, size)
   253  		for idx := range valid {
   254  			valid[idx] = true
   255  		}
   256  		for i := 0; i < numNulls; i++ {
   257  			valid[i*2] = false
   258  		}
   259  
   260  		bldr.AppendValues(values, valid)
   261  		return bldr.NewArray()
   262  	case arrow.UINT16:
   263  		bldr := array.NewUint16Builder(memory.DefaultAllocator)
   264  		defer bldr.Release()
   265  		values := make([]uint16, size)
   266  		FillRandomUint16(0, 0, 64, values)
   267  		valid := make([]bool, size)
   268  		for idx := range valid {
   269  			valid[idx] = true
   270  		}
   271  		for i := 0; i < numNulls; i++ {
   272  			valid[i*2] = false
   273  		}
   274  
   275  		bldr.AppendValues(values, valid)
   276  		return bldr.NewArray()
   277  	case arrow.INT32:
   278  		bldr := array.NewInt32Builder(memory.DefaultAllocator)
   279  		defer bldr.Release()
   280  		values := make([]int32, size)
   281  		FillRandomInt32Max(0, 64, values)
   282  		valid := make([]bool, size)
   283  		for idx := range valid {
   284  			valid[idx] = true
   285  		}
   286  		for i := 0; i < numNulls; i++ {
   287  			valid[i*2] = false
   288  		}
   289  
   290  		bldr.AppendValues(values, valid)
   291  		return bldr.NewArray()
   292  	case arrow.UINT32:
   293  		bldr := array.NewUint32Builder(memory.DefaultAllocator)
   294  		defer bldr.Release()
   295  		values := make([]uint32, size)
   296  		FillRandomUint32Max(0, 64, values)
   297  		valid := make([]bool, size)
   298  		for idx := range valid {
   299  			valid[idx] = true
   300  		}
   301  		for i := 0; i < numNulls; i++ {
   302  			valid[i*2] = false
   303  		}
   304  
   305  		bldr.AppendValues(values, valid)
   306  		return bldr.NewArray()
   307  
   308  	case arrow.INT64:
   309  		bldr := array.NewInt64Builder(memory.DefaultAllocator)
   310  		defer bldr.Release()
   311  		values := make([]int64, size)
   312  		FillRandomInt64Max(0, 64, values)
   313  		valid := make([]bool, size)
   314  		for idx := range valid {
   315  			valid[idx] = true
   316  		}
   317  		for i := 0; i < numNulls; i++ {
   318  			valid[i*2] = false
   319  		}
   320  
   321  		bldr.AppendValues(values, valid)
   322  		return bldr.NewArray()
   323  	case arrow.UINT64:
   324  		bldr := array.NewUint64Builder(memory.DefaultAllocator)
   325  		defer bldr.Release()
   326  		values := make([]uint64, size)
   327  		FillRandomUint64Max(0, 64, values)
   328  		valid := make([]bool, size)
   329  		for idx := range valid {
   330  			valid[idx] = true
   331  		}
   332  		for i := 0; i < numNulls; i++ {
   333  			valid[i*2] = false
   334  		}
   335  
   336  		bldr.AppendValues(values, valid)
   337  		return bldr.NewArray()
   338  	case arrow.DATE32:
   339  		bldr := array.NewDate32Builder(memory.DefaultAllocator)
   340  		defer bldr.Release()
   341  		values := make([]int32, size)
   342  		FillRandomInt32Max(0, 24, values)
   343  
   344  		dates := make([]arrow.Date32, size)
   345  		for idx, val := range values {
   346  			dates[idx] = arrow.Date32(val) * 86400000
   347  		}
   348  		valid := make([]bool, size)
   349  		for idx := range valid {
   350  			valid[idx] = true
   351  		}
   352  		for i := 0; i < numNulls; i++ {
   353  			valid[i*2] = false
   354  		}
   355  		bldr.AppendValues(dates, valid)
   356  		return bldr.NewArray()
   357  	case arrow.DATE64:
   358  		bldr := array.NewDate64Builder(memory.DefaultAllocator)
   359  		defer bldr.Release()
   360  		values := make([]int64, size)
   361  		FillRandomInt64Max(0, 24, values)
   362  
   363  		dates := make([]arrow.Date64, size)
   364  		for idx, val := range values {
   365  			dates[idx] = arrow.Date64(val) * 86400000
   366  		}
   367  		valid := make([]bool, size)
   368  		for idx := range valid {
   369  			valid[idx] = true
   370  		}
   371  		for i := 0; i < numNulls; i++ {
   372  			valid[i*2] = false
   373  		}
   374  		bldr.AppendValues(dates, valid)
   375  		return bldr.NewArray()
   376  	case arrow.BINARY:
   377  		bldr := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary)
   378  		defer bldr.Release()
   379  
   380  		valid := make([]bool, size)
   381  		for idx := range valid {
   382  			valid[idx] = true
   383  		}
   384  		for i := 0; i < numNulls; i++ {
   385  			valid[i*2] = false
   386  		}
   387  
   388  		buf := make([]byte, 12)
   389  		r := rand.New(rand.NewSource(0))
   390  		for i := 0; i < size; i++ {
   391  			if !valid[i] {
   392  				bldr.AppendNull()
   393  				continue
   394  			}
   395  
   396  			length := r.Intn(12-2+1) + 2
   397  			r.Read(buf[:length])
   398  			bldr.Append(buf[:length])
   399  		}
   400  		return bldr.NewArray()
   401  	case arrow.STRING:
   402  		bldr := array.NewStringBuilder(memory.DefaultAllocator)
   403  		defer bldr.Release()
   404  
   405  		valid := make([]bool, size)
   406  		for idx := range valid {
   407  			valid[idx] = true
   408  		}
   409  		for i := 0; i < numNulls; i++ {
   410  			valid[i*2] = false
   411  		}
   412  
   413  		buf := make([]byte, 12)
   414  		r := rand.New(rand.NewSource(0))
   415  		for i := 0; i < size; i++ {
   416  			if !valid[i] {
   417  				bldr.AppendNull()
   418  				continue
   419  			}
   420  
   421  			length := r.Intn(12-2+1) + 2
   422  			r.Read(buf[:length])
   423  			// trivially force data to be valid UTF8 by making it all ASCII
   424  			for idx := range buf[:length] {
   425  				buf[idx] &= 0x7f
   426  			}
   427  			bldr.Append(string(buf[:length]))
   428  		}
   429  		return bldr.NewArray()
   430  	case arrow.FIXED_SIZE_BINARY:
   431  		bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 10})
   432  		defer bldr.Release()
   433  
   434  		valid := make([]bool, size)
   435  		for idx := range valid {
   436  			valid[idx] = true
   437  		}
   438  		for i := 0; i < numNulls; i++ {
   439  			valid[i*2] = false
   440  		}
   441  
   442  		buf := make([]byte, 10)
   443  		r := rand.New(rand.NewSource(0))
   444  		for i := 0; i < size; i++ {
   445  			if !valid[i] {
   446  				bldr.AppendNull()
   447  				continue
   448  			}
   449  
   450  			r.Read(buf)
   451  			bldr.Append(buf)
   452  		}
   453  		return bldr.NewArray()
   454  	case arrow.DECIMAL:
   455  		dectype := dt.(*arrow.Decimal128Type)
   456  		bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dectype)
   457  		defer bldr.Release()
   458  
   459  		valid := make([]bool, size)
   460  		for idx := range valid {
   461  			valid[idx] = true
   462  		}
   463  		for i := 0; i < numNulls; i++ {
   464  			valid[i*2] = false
   465  		}
   466  
   467  		data := RandomDecimals(int64(size), 0, dectype.Precision)
   468  		bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), valid)
   469  		return bldr.NewArray()
   470  	case arrow.BOOL:
   471  		bldr := array.NewBooleanBuilder(memory.DefaultAllocator)
   472  		defer bldr.Release()
   473  
   474  		valid := make([]bool, size)
   475  		for idx := range valid {
   476  			valid[idx] = true
   477  		}
   478  		for i := 0; i < numNulls; i++ {
   479  			valid[i*2] = false
   480  		}
   481  
   482  		values := make([]bool, size)
   483  		FillRandomBooleans(0.5, 0, values)
   484  		bldr.AppendValues(values, valid)
   485  		return bldr.NewArray()
   486  	}
   487  	return nil
   488  }