github.com/apache/arrow/go/v14@v14.0.1/arrow/compute/fieldref_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package compute_test
    18  
    19  import (
    20  	"testing"
    21  
    22  	"github.com/apache/arrow/go/v14/arrow"
    23  	"github.com/apache/arrow/go/v14/arrow/array"
    24  	"github.com/apache/arrow/go/v14/arrow/compute"
    25  	"github.com/apache/arrow/go/v14/arrow/memory"
    26  	"github.com/stretchr/testify/assert"
    27  )
    28  
    29  func TestFieldPathBasics(t *testing.T) {
    30  	f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
    31  	f1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
    32  	f2 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
    33  	f3 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
    34  
    35  	s := arrow.NewSchema([]arrow.Field{f0, f1, f2, f3}, nil)
    36  
    37  	for i := range s.Fields() {
    38  		f, err := compute.FieldPath{i}.Get(s)
    39  		assert.NoError(t, err)
    40  		assert.Equal(t, s.Field(i), *f)
    41  	}
    42  
    43  	f, err := compute.FieldPath{}.Get(s)
    44  	assert.Nil(t, f)
    45  	assert.ErrorIs(t, err, compute.ErrEmpty)
    46  
    47  	f, err = compute.FieldPath{len(s.Fields()) * 2}.Get(s)
    48  	assert.Nil(t, f)
    49  	assert.ErrorIs(t, err, compute.ErrIndexRange)
    50  }
    51  
    52  func TestFieldRefBasics(t *testing.T) {
    53  	f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
    54  	f1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
    55  	f2 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
    56  	f3 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
    57  
    58  	s := arrow.NewSchema([]arrow.Field{f0, f1, f2, f3}, nil)
    59  
    60  	// lookup by index returns Indices{index}
    61  	for i := range s.Fields() {
    62  		assert.ElementsMatch(t, []compute.FieldPath{{i}}, compute.FieldRefIndex(i).FindAll(s.Fields()))
    63  	}
    64  
    65  	// out of range index results in failure to match
    66  	assert.Empty(t, compute.FieldRefIndex(len(s.Fields())*2).FindAll(s.Fields()))
    67  
    68  	// lookup by name returns the indices of both matching fields
    69  	assert.Equal(t, []compute.FieldPath{{0}, {2}}, compute.FieldRefName("alpha").FindAll(s.Fields()))
    70  	assert.Equal(t, []compute.FieldPath{{1}, {3}}, compute.FieldRefName("beta").FindAll(s.Fields()))
    71  }
    72  
    73  func TestFieldRefDotPath(t *testing.T) {
    74  	ref, err := compute.NewFieldRefFromDotPath(`.alpha`)
    75  	assert.True(t, ref.IsName())
    76  	assert.Equal(t, "alpha", ref.Name())
    77  	assert.False(t, ref.IsFieldPath())
    78  	assert.False(t, ref.IsNested())
    79  	assert.NoError(t, err)
    80  	assert.Equal(t, compute.FieldRefName("alpha"), ref)
    81  	assert.True(t, ref.Equals(compute.FieldRefName("alpha")))
    82  
    83  	ref, err = compute.NewFieldRefFromDotPath(`..`)
    84  	assert.Empty(t, ref.Name())
    85  	assert.False(t, ref.IsName())
    86  	assert.False(t, ref.IsFieldPath())
    87  	assert.Nil(t, ref.FieldPath())
    88  	assert.True(t, ref.IsNested())
    89  	assert.NoError(t, err)
    90  	assert.Equal(t, compute.FieldRefList("", ""), ref)
    91  
    92  	ref, err = compute.NewFieldRefFromDotPath(`[2]`)
    93  	assert.False(t, ref.IsName())
    94  	assert.True(t, ref.IsFieldPath())
    95  	assert.Equal(t, compute.FieldPath{2}, ref.FieldPath())
    96  	assert.False(t, ref.IsNested())
    97  	assert.NoError(t, err)
    98  	assert.Equal(t, compute.FieldRefIndex(2), ref)
    99  
   100  	ref, err = compute.NewFieldRefFromDotPath(`.beta[3]`)
   101  	assert.NoError(t, err)
   102  	assert.Equal(t, compute.FieldRefList("beta", 3), ref)
   103  
   104  	ref, err = compute.NewFieldRefFromDotPath(`[5].gamma.delta[7]`)
   105  	assert.NoError(t, err)
   106  	assert.Equal(t, compute.FieldRefList(5, "gamma", "delta", 7), ref)
   107  
   108  	ref, err = compute.NewFieldRefFromDotPath(`.hello world`)
   109  	assert.NoError(t, err)
   110  	assert.Equal(t, compute.FieldRefName("hello world"), ref)
   111  
   112  	ref, err = compute.NewFieldRefFromDotPath(`.\[y\]\\tho\.\`)
   113  	assert.NoError(t, err)
   114  	assert.Equal(t, compute.FieldRefName(`[y]\tho.\`), ref)
   115  
   116  	_, err = compute.NewFieldRefFromDotPath(``)
   117  	assert.ErrorIs(t, err, compute.ErrInvalid)
   118  
   119  	_, err = compute.NewFieldRefFromDotPath(`alpha`)
   120  	assert.ErrorIs(t, err, compute.ErrInvalid)
   121  
   122  	_, err = compute.NewFieldRefFromDotPath(`[134234`)
   123  	assert.ErrorIs(t, err, compute.ErrInvalid)
   124  
   125  	_, err = compute.NewFieldRefFromDotPath(`[1stuf]`)
   126  	assert.ErrorIs(t, err, compute.ErrInvalid)
   127  }
   128  
   129  func TestFieldPathNested(t *testing.T) {
   130  	f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
   131  	f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
   132  	f1 := arrow.Field{Name: "beta", Type: arrow.StructOf(f1_0)}
   133  	f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
   134  	f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
   135  	f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
   136  	f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)}
   137  	f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)}
   138  	s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil)
   139  
   140  	f, err := compute.FieldPath{0}.Get(s)
   141  	assert.NoError(t, err)
   142  	assert.Equal(t, f0, *f)
   143  
   144  	f, err = compute.FieldPath{0, 0}.Get(s)
   145  	assert.ErrorIs(t, err, compute.ErrNoChildren)
   146  	assert.Nil(t, f)
   147  
   148  	f, err = compute.FieldPath{1, 0}.Get(s)
   149  	assert.NoError(t, err)
   150  	assert.Equal(t, f1_0, *f)
   151  
   152  	f, err = compute.FieldPath{2, 0}.Get(s)
   153  	assert.NoError(t, err)
   154  	assert.Equal(t, f2_0, *f)
   155  
   156  	f, err = compute.FieldPath{2, 1, 0}.Get(s)
   157  	assert.NoError(t, err)
   158  	assert.Equal(t, f2_1_0, *f)
   159  
   160  	f, err = compute.FieldPath{1, 0}.GetField(s.Field(2))
   161  	assert.NoError(t, err)
   162  	assert.Equal(t, f2_1_0, *f)
   163  
   164  	f, err = compute.FieldPath{2, 1, 1}.Get(s)
   165  	assert.NoError(t, err)
   166  	assert.Equal(t, f2_1_1, *f)
   167  }
   168  
   169  func TestFindFuncs(t *testing.T) {
   170  	f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
   171  	f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
   172  	f1 := arrow.Field{Name: "alpha", Type: arrow.StructOf(f1_0)}
   173  	f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
   174  	f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
   175  	f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
   176  	f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)}
   177  	f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)}
   178  	s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil)
   179  
   180  	assert.Equal(t, []compute.FieldPath{{1}}, compute.FieldRefName("gamma").FindAllField(f2))
   181  	fp, err := compute.FieldRefName("alpha").FindOneOrNone(s)
   182  	assert.ErrorIs(t, err, compute.ErrMultipleMatches)
   183  	assert.Len(t, fp, 0)
   184  	fp, err = compute.FieldRefName("alpha").FindOne(s)
   185  	assert.ErrorIs(t, err, compute.ErrMultipleMatches)
   186  	assert.Len(t, fp, 0)
   187  
   188  	fp, err = compute.FieldRefName("beta").FindOneOrNone(s)
   189  	assert.NoError(t, err)
   190  	assert.Equal(t, compute.FieldPath{2}, fp)
   191  	fp, err = compute.FieldRefName("beta").FindOne(s)
   192  	assert.NoError(t, err)
   193  	assert.Equal(t, compute.FieldPath{2}, fp)
   194  
   195  	fp, err = compute.FieldRefName("gamma").FindOneOrNone(s)
   196  	assert.NoError(t, err)
   197  	assert.Len(t, fp, 0)
   198  
   199  	fp, err = compute.FieldRefName("gamma").FindOne(s)
   200  	assert.ErrorIs(t, err, compute.ErrNoMatch)
   201  	assert.Nil(t, fp)
   202  }
   203  
   204  func TestGetFieldFuncs(t *testing.T) {
   205  	f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
   206  	f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
   207  	f1 := arrow.Field{Name: "alpha", Type: arrow.StructOf(f1_0)}
   208  	f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
   209  	f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32}
   210  	f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32}
   211  	f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)}
   212  	f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)}
   213  	s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil)
   214  
   215  	ref, err := compute.NewFieldRefFromDotPath(`[2].alpha`)
   216  	assert.NoError(t, err)
   217  
   218  	f, err := ref.GetOneField(s)
   219  	assert.NoError(t, err)
   220  	assert.Equal(t, f2_0, *f)
   221  	f, err = ref.GetOneOrNone(s)
   222  	assert.NoError(t, err)
   223  	assert.Equal(t, f2_0, *f)
   224  
   225  	ref = compute.FieldRefList("beta", "gamma", 2)
   226  	f, err = ref.GetOneField(s)
   227  	assert.ErrorIs(t, err, compute.ErrNoMatch)
   228  	assert.Nil(t, f)
   229  	f, err = ref.GetOneOrNone(s)
   230  	assert.NoError(t, err)
   231  	assert.Nil(t, f)
   232  
   233  	f, err = compute.FieldRefName("alpha").GetOneOrNone(s)
   234  	assert.ErrorIs(t, err, compute.ErrMultipleMatches)
   235  	assert.Nil(t, f)
   236  }
   237  
   238  func TestFieldRefRecord(t *testing.T) {
   239  	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
   240  	defer mem.AssertSize(t, 0)
   241  
   242  	alphaBldr := array.NewInt32Builder(mem)
   243  	defer alphaBldr.Release()
   244  
   245  	betaBldr := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32)
   246  	defer betaBldr.Release()
   247  
   248  	gammaBldr := array.NewStructBuilder(mem, arrow.StructOf(
   249  		arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
   250  		arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32, Nullable: true}))
   251  	defer gammaBldr.Release()
   252  
   253  	alphaBldr.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil)
   254  	betaBldr.AppendValues([]int32{0, 3, 7, 8, 8, 10, 13, 14, 17, 20, 22}, []bool{true, true, true, false, true, true, true, true, true, true})
   255  	for i := 0; i < 22; i++ {
   256  		betaBldr.ValueBuilder().(*array.Int32Builder).Append(int32(i * 2))
   257  	}
   258  
   259  	gammaBldr.AppendValues([]bool{true, true, true, true, true, true, true, true, true, true})
   260  	gammaBldr.FieldBuilder(0).(*array.Int32Builder).AppendValues([]int32{10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, nil)
   261  	gammaBldr.FieldBuilder(1).(*array.Int32Builder).AppendValues([]int32{-10, -20, -30, -40, -50, -60, -70, -80, -90, -100}, nil)
   262  
   263  	alpha := alphaBldr.NewInt32Array()
   264  	defer alpha.Release()
   265  	beta := betaBldr.NewListArray()
   266  	defer beta.Release()
   267  	gamma := gammaBldr.NewStructArray()
   268  	defer gamma.Release()
   269  
   270  	rec := array.NewRecord(arrow.NewSchema([]arrow.Field{
   271  		{Name: "alpha", Type: alpha.DataType(), Nullable: true},
   272  		{Name: "alpha", Type: beta.DataType(), Nullable: true},
   273  		{Name: "alpha", Type: gamma.DataType(), Nullable: true},
   274  	}, nil), []arrow.Array{alpha, beta, gamma}, 10)
   275  	defer rec.Release()
   276  
   277  	arr, err := compute.FieldPath{2, 0}.GetColumn(rec)
   278  	assert.NoError(t, err)
   279  	assert.Same(t, gamma.Field(0), arr)
   280  
   281  	arr, err = compute.FieldPath{}.GetColumn(rec)
   282  	assert.ErrorIs(t, err, compute.ErrEmpty)
   283  	assert.Nil(t, arr)
   284  
   285  	arr, err = compute.FieldPath{1, 0}.GetColumn(rec)
   286  	assert.NoError(t, err)
   287  	assert.Same(t, beta.ListValues(), arr)
   288  
   289  	arr, err = compute.FieldPath{1, 0, 0}.GetColumn(rec)
   290  	assert.ErrorIs(t, err, compute.ErrNoChildren)
   291  	assert.Nil(t, arr)
   292  
   293  	arr, err = compute.FieldPath{2, 2}.GetColumn(rec)
   294  	assert.ErrorIs(t, err, compute.ErrIndexRange)
   295  	assert.Nil(t, arr)
   296  
   297  	arrs, err := compute.FieldRefName("alpha").GetAllColumns(rec)
   298  	assert.NoError(t, err)
   299  	assert.Equal(t, []arrow.Array{alpha, beta, gamma}, arrs)
   300  
   301  	arrs, err = compute.FieldRefName("delta").GetAllColumns(rec)
   302  	assert.NoError(t, err)
   303  	assert.Len(t, arrs, 0)
   304  
   305  	arr, err = compute.FieldRefName("delta").GetOneColumnOrNone(rec)
   306  	assert.NoError(t, err)
   307  	assert.Nil(t, arr)
   308  
   309  	arr, err = compute.FieldRefName("alpha").GetOneColumnOrNone(rec)
   310  	assert.ErrorIs(t, err, compute.ErrMultipleMatches)
   311  	assert.Nil(t, arr)
   312  
   313  	arr, err = compute.FieldRefList("alpha", "beta").GetOneColumnOrNone(rec)
   314  	assert.NoError(t, err)
   315  	assert.Same(t, gamma.Field(1), arr)
   316  }