github.com/apache/arrow/go/v16@v16.1.0/arrow/compute/expression.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 //go:build go1.18 18 19 package compute 20 21 import ( 22 "bytes" 23 "encoding/hex" 24 "errors" 25 "fmt" 26 "hash/maphash" 27 "reflect" 28 "strconv" 29 "strings" 30 31 "github.com/apache/arrow/go/v16/arrow" 32 "github.com/apache/arrow/go/v16/arrow/array" 33 "github.com/apache/arrow/go/v16/arrow/compute/exec" 34 "github.com/apache/arrow/go/v16/arrow/compute/internal/kernels" 35 "github.com/apache/arrow/go/v16/arrow/internal/debug" 36 "github.com/apache/arrow/go/v16/arrow/ipc" 37 "github.com/apache/arrow/go/v16/arrow/memory" 38 "github.com/apache/arrow/go/v16/arrow/scalar" 39 ) 40 41 var hashSeed = maphash.MakeSeed() 42 43 // Expression is an interface for mapping one datum to another. An expression 44 // is one of: 45 // 46 // A literal Datum 47 // A reference to a single (potentially nested) field of an input Datum 48 // A call to a compute function, with arguments specified by other Expressions 49 // 50 // Deprecated: use substrait-go expressions instead. 51 type Expression interface { 52 fmt.Stringer 53 // IsBound returns true if this expression has been bound to a particular 54 // Datum and/or Schema. 55 IsBound() bool 56 // IsScalarExpr returns true if this expression is composed only of scalar 57 // literals, field references and calls to scalar functions. 58 IsScalarExpr() bool 59 // IsNullLiteral returns true if this expression is a literal and entirely 60 // null. 61 IsNullLiteral() bool 62 // IsSatisfiable returns true if this expression could evaluate to true 63 IsSatisfiable() bool 64 // FieldRef returns a pointer to the underlying field reference, or nil if 65 // this expression is not a field reference. 66 FieldRef() *FieldRef 67 // Type returns the datatype this expression will evaluate to. 68 Type() arrow.DataType 69 70 Hash() uint64 71 Equals(Expression) bool 72 73 // Release releases the underlying bound C++ memory that is allocated when 74 // a Bind is performed. Any bound expression should get released to ensure 75 // no memory leaks. 76 Release() 77 } 78 79 func printDatum(datum Datum) string { 80 switch datum := datum.(type) { 81 case *ScalarDatum: 82 if !datum.Value.IsValid() { 83 return "null" 84 } 85 86 switch datum.Type().ID() { 87 case arrow.STRING, arrow.LARGE_STRING: 88 return strconv.Quote(datum.Value.(scalar.BinaryScalar).String()) 89 case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.LARGE_BINARY: 90 return `"` + strings.ToUpper(hex.EncodeToString(datum.Value.(scalar.BinaryScalar).Data())) + `"` 91 } 92 93 return datum.Value.String() 94 default: 95 return datum.String() 96 } 97 } 98 99 // Literal is an expression denoting a literal Datum which could be any value 100 // as a scalar, an array, or so on. 101 // 102 // Deprecated: use substrait-go expressions Literal instead. 103 type Literal struct { 104 Literal Datum 105 } 106 107 func (Literal) FieldRef() *FieldRef { return nil } 108 func (l *Literal) String() string { return printDatum(l.Literal) } 109 func (l *Literal) Type() arrow.DataType { return l.Literal.(ArrayLikeDatum).Type() } 110 func (l *Literal) IsBound() bool { return l.Type() != nil } 111 func (l *Literal) IsScalarExpr() bool { return l.Literal.Kind() == KindScalar } 112 113 func (l *Literal) Equals(other Expression) bool { 114 if rhs, ok := other.(*Literal); ok { 115 return l.Literal.Equals(rhs.Literal) 116 } 117 return false 118 } 119 120 func (l *Literal) IsNullLiteral() bool { 121 if ad, ok := l.Literal.(ArrayLikeDatum); ok { 122 return ad.NullN() == ad.Len() 123 } 124 return true 125 } 126 127 func (l *Literal) IsSatisfiable() bool { 128 if l.IsNullLiteral() { 129 return false 130 } 131 132 if sc, ok := l.Literal.(*ScalarDatum); ok && sc.Type().ID() == arrow.BOOL { 133 return sc.Value.(*scalar.Boolean).Value 134 } 135 136 return true 137 } 138 139 func (l *Literal) Hash() uint64 { 140 if l.IsScalarExpr() { 141 return scalar.Hash(hashSeed, l.Literal.(*ScalarDatum).Value) 142 } 143 return 0 144 } 145 146 func (l *Literal) Release() { 147 l.Literal.Release() 148 } 149 150 // Parameter represents a field reference and needs to be bound in order to determine 151 // its type and shape. 152 // 153 // Deprecated: use substrait-go field references instead. 154 type Parameter struct { 155 ref *FieldRef 156 157 // post bind props 158 dt arrow.DataType 159 index int 160 } 161 162 func (Parameter) IsNullLiteral() bool { return false } 163 func (p *Parameter) Type() arrow.DataType { return p.dt } 164 func (p *Parameter) IsBound() bool { return p.Type() != nil } 165 func (p *Parameter) IsScalarExpr() bool { return p.ref != nil } 166 func (p *Parameter) IsSatisfiable() bool { return p.Type() == nil || p.Type().ID() != arrow.NULL } 167 func (p *Parameter) FieldRef() *FieldRef { return p.ref } 168 func (p *Parameter) Hash() uint64 { return p.ref.Hash(hashSeed) } 169 170 func (p *Parameter) String() string { 171 switch { 172 case p.ref.IsName(): 173 return p.ref.Name() 174 case p.ref.IsFieldPath(): 175 return p.ref.FieldPath().String() 176 default: 177 return p.ref.String() 178 } 179 } 180 181 func (p *Parameter) Equals(other Expression) bool { 182 if rhs, ok := other.(*Parameter); ok { 183 return p.ref.Equals(*rhs.ref) 184 } 185 186 return false 187 } 188 189 func (p *Parameter) Release() {} 190 191 type comparisonType int8 192 193 const ( 194 compNA comparisonType = 0 195 compEQ comparisonType = 1 196 compLT comparisonType = 2 197 compGT comparisonType = 4 198 compNE comparisonType = compLT | compGT 199 compLE comparisonType = compLT | compEQ 200 compGE comparisonType = compGT | compEQ 201 ) 202 203 //lint:ignore U1000 ignore that this is unused for now 204 func (c comparisonType) name() string { 205 switch c { 206 case compEQ: 207 return "equal" 208 case compLT: 209 return "less" 210 case compGT: 211 return "greater" 212 case compNE: 213 return "not_equal" 214 case compLE: 215 return "less_equal" 216 case compGE: 217 return "greater_equal" 218 } 219 return "na" 220 } 221 222 func (c comparisonType) getOp() string { 223 switch c { 224 case compEQ: 225 return "==" 226 case compLT: 227 return "<" 228 case compGT: 229 return ">" 230 case compNE: 231 return "!=" 232 case compLE: 233 return "<=" 234 case compGE: 235 return ">=" 236 } 237 debug.Assert(false, "invalid getop") 238 return "" 239 } 240 241 var compmap = map[string]comparisonType{ 242 "equal": compEQ, 243 "less": compLT, 244 "greater": compGT, 245 "not_equal": compNE, 246 "less_equal": compLE, 247 "greater_equal": compGE, 248 } 249 250 func optionsToString(fn FunctionOptions) string { 251 if s, ok := fn.(fmt.Stringer); ok { 252 return s.String() 253 } 254 255 var b strings.Builder 256 v := reflect.Indirect(reflect.ValueOf(fn)) 257 b.WriteByte('{') 258 for i := 0; i < v.Type().NumField(); i++ { 259 fld := v.Type().Field(i) 260 tag := fld.Tag.Get("compute") 261 if tag == "-" { 262 continue 263 } 264 265 fldVal := v.Field(i) 266 fmt.Fprintf(&b, "%s=%v, ", tag, fldVal.Interface()) 267 } 268 ret := b.String() 269 return ret[:len(ret)-2] + "}" 270 } 271 272 // Call is a function call with specific arguments which are themselves other 273 // expressions. A call can also have options that are specific to the function 274 // in question. It must be bound to determine the shape and type. 275 // 276 // Deprecated: use substrait-go expression functions instead. 277 type Call struct { 278 funcName string 279 args []Expression 280 dt arrow.DataType 281 options FunctionOptions 282 283 cachedHash uint64 284 } 285 286 func (c *Call) IsNullLiteral() bool { return false } 287 func (c *Call) FieldRef() *FieldRef { return nil } 288 func (c *Call) Type() arrow.DataType { return c.dt } 289 func (c *Call) IsSatisfiable() bool { return c.Type() == nil || c.Type().ID() != arrow.NULL } 290 291 func (c *Call) String() string { 292 binary := func(op string) string { 293 return "(" + c.args[0].String() + " " + op + " " + c.args[1].String() + ")" 294 } 295 296 if cmp, ok := compmap[c.funcName]; ok { 297 return binary(cmp.getOp()) 298 } 299 300 const kleene = "_kleene" 301 if strings.HasSuffix(c.funcName, kleene) { 302 return binary(strings.TrimSuffix(c.funcName, kleene)) 303 } 304 305 if c.funcName == "make_struct" && c.options != nil { 306 opts := c.options.(*MakeStructOptions) 307 out := "{" 308 for i, a := range c.args { 309 out += opts.FieldNames[i] + "=" + a.String() + ", " 310 } 311 return out[:len(out)-2] + "}" 312 } 313 314 var b strings.Builder 315 b.WriteString(c.funcName + "(") 316 for _, a := range c.args { 317 b.WriteString(a.String() + ", ") 318 } 319 320 if c.options != nil { 321 b.WriteString(optionsToString(c.options)) 322 b.WriteString(" ") 323 } 324 325 ret := b.String() 326 return ret[:len(ret)-2] + ")" 327 } 328 329 func (c *Call) Hash() uint64 { 330 if c.cachedHash != 0 { 331 return c.cachedHash 332 } 333 334 var h maphash.Hash 335 h.SetSeed(hashSeed) 336 337 h.WriteString(c.funcName) 338 c.cachedHash = h.Sum64() 339 for _, arg := range c.args { 340 c.cachedHash = exec.HashCombine(c.cachedHash, arg.Hash()) 341 } 342 return c.cachedHash 343 } 344 345 func (c *Call) IsScalarExpr() bool { 346 for _, arg := range c.args { 347 if !arg.IsScalarExpr() { 348 return false 349 } 350 } 351 352 return false 353 // return isFuncScalar(c.funcName) 354 } 355 356 func (c *Call) IsBound() bool { 357 return c.Type() != nil 358 } 359 360 func (c *Call) Equals(other Expression) bool { 361 rhs, ok := other.(*Call) 362 if !ok { 363 return false 364 } 365 366 if c.funcName != rhs.funcName || len(c.args) != len(rhs.args) { 367 return false 368 } 369 370 for i := range c.args { 371 if !c.args[i].Equals(rhs.args[i]) { 372 return false 373 } 374 } 375 376 if opt, ok := c.options.(FunctionOptionsEqual); ok { 377 return opt.Equals(rhs.options) 378 } 379 return reflect.DeepEqual(c.options, rhs.options) 380 } 381 382 func (c *Call) Release() { 383 for _, a := range c.args { 384 a.Release() 385 } 386 if r, ok := c.options.(releasable); ok { 387 r.Release() 388 } 389 } 390 391 // FunctionOptions can be any type which has a TypeName function. The fields 392 // of the type will be used (via reflection) to determine the information to 393 // propagate when serializing to pass to the C++ for execution. 394 type FunctionOptions interface { 395 TypeName() string 396 } 397 398 type FunctionOptionsEqual interface { 399 Equals(FunctionOptions) bool 400 } 401 402 type FunctionOptionsCloneable interface { 403 Clone() FunctionOptions 404 } 405 406 type MakeStructOptions struct { 407 FieldNames []string `compute:"field_names"` 408 FieldNullability []bool `compute:"field_nullability"` 409 FieldMetadata []*arrow.Metadata `compute:"field_metadata"` 410 } 411 412 func (MakeStructOptions) TypeName() string { return "MakeStructOptions" } 413 414 type NullOptions struct { 415 NanIsNull bool `compute:"nan_is_null"` 416 } 417 418 func (NullOptions) TypeName() string { return "NullOptions" } 419 420 type StrptimeOptions struct { 421 Format string `compute:"format"` 422 Unit arrow.TimeUnit `compute:"unit"` 423 } 424 425 func (StrptimeOptions) TypeName() string { return "StrptimeOptions" } 426 427 type NullSelectionBehavior = kernels.NullSelectionBehavior 428 429 const ( 430 SelectionEmitNulls = kernels.EmitNulls 431 SelectionDropNulls = kernels.DropNulls 432 ) 433 434 type ArithmeticOptions struct { 435 NoCheckOverflow bool `compute:"check_overflow"` 436 } 437 438 func (ArithmeticOptions) TypeName() string { return "ArithmeticOptions" } 439 440 type ( 441 CastOptions = kernels.CastOptions 442 FilterOptions = kernels.FilterOptions 443 TakeOptions = kernels.TakeOptions 444 ) 445 446 func DefaultFilterOptions() *FilterOptions { return &FilterOptions{} } 447 448 func DefaultTakeOptions() *TakeOptions { return &TakeOptions{BoundsCheck: true} } 449 450 func DefaultCastOptions(safe bool) *CastOptions { 451 if safe { 452 return &CastOptions{} 453 } 454 return &CastOptions{ 455 AllowIntOverflow: true, 456 AllowTimeTruncate: true, 457 AllowTimeOverflow: true, 458 AllowDecimalTruncate: true, 459 AllowFloatTruncate: true, 460 AllowInvalidUtf8: true, 461 } 462 } 463 464 func UnsafeCastOptions(dt arrow.DataType) *CastOptions { 465 return NewCastOptions(dt, false) 466 } 467 468 func SafeCastOptions(dt arrow.DataType) *CastOptions { 469 return NewCastOptions(dt, true) 470 } 471 472 func NewCastOptions(dt arrow.DataType, safe bool) *CastOptions { 473 opts := DefaultCastOptions(safe) 474 if dt != nil { 475 opts.ToType = dt 476 } else { 477 opts.ToType = arrow.Null 478 } 479 return opts 480 } 481 482 func Cast(ex Expression, dt arrow.DataType) Expression { 483 opts := &CastOptions{} 484 if dt == nil { 485 opts.ToType = arrow.Null 486 } else { 487 opts.ToType = dt 488 } 489 490 return NewCall("cast", []Expression{ex}, opts) 491 } 492 493 type SetLookupOptions struct { 494 ValueSet Datum `compute:"value_set"` 495 SkipNulls bool `compute:"skip_nulls"` 496 } 497 498 func (SetLookupOptions) TypeName() string { return "SetLookupOptions" } 499 500 func (s *SetLookupOptions) Release() { s.ValueSet.Release() } 501 502 func (s *SetLookupOptions) Equals(other FunctionOptions) bool { 503 rhs, ok := other.(*SetLookupOptions) 504 if !ok { 505 return false 506 } 507 508 return s.SkipNulls == rhs.SkipNulls && s.ValueSet.Equals(rhs.ValueSet) 509 } 510 511 func (s *SetLookupOptions) FromStructScalar(sc *scalar.Struct) error { 512 if v, err := sc.Field("skip_nulls"); err == nil { 513 s.SkipNulls = v.(*scalar.Boolean).Value 514 } 515 516 value, err := sc.Field("value_set") 517 if err != nil { 518 return err 519 } 520 521 if v, ok := value.(scalar.ListScalar); ok { 522 s.ValueSet = NewDatum(v.GetList()) 523 return nil 524 } 525 526 return errors.New("set lookup options valueset should be a list") 527 } 528 529 var ( 530 funcOptionsMap map[string]reflect.Type 531 funcOptsTypes = []FunctionOptions{ 532 SetLookupOptions{}, ArithmeticOptions{}, CastOptions{}, 533 FilterOptions{}, NullOptions{}, StrptimeOptions{}, MakeStructOptions{}, 534 } 535 ) 536 537 func init() { 538 funcOptionsMap = make(map[string]reflect.Type) 539 for _, ft := range funcOptsTypes { 540 funcOptionsMap[ft.TypeName()] = reflect.TypeOf(ft) 541 } 542 } 543 544 // NewLiteral constructs a new literal expression from any value. It is passed 545 // to NewDatum which will construct the appropriate Datum and/or scalar 546 // value for the type provided. 547 func NewLiteral(arg interface{}) Expression { 548 return &Literal{Literal: NewDatum(arg)} 549 } 550 551 func NullLiteral(dt arrow.DataType) Expression { 552 return &Literal{Literal: NewDatum(scalar.MakeNullScalar(dt))} 553 } 554 555 // NewRef constructs a parameter expression which refers to a specific field 556 func NewRef(ref FieldRef) Expression { 557 return &Parameter{ref: &ref, index: -1} 558 } 559 560 // NewFieldRef is shorthand for NewRef(FieldRefName(field)) 561 func NewFieldRef(field string) Expression { 562 return NewRef(FieldRefName(field)) 563 } 564 565 // NewCall constructs an expression that represents a specific function call with 566 // the given arguments and options. 567 func NewCall(name string, args []Expression, opts FunctionOptions) Expression { 568 return &Call{funcName: name, args: args, options: opts} 569 } 570 571 // Project is shorthand for `make_struct` to produce a record batch output 572 // from a group of expressions. 573 func Project(values []Expression, names []string) Expression { 574 nulls := make([]bool, len(names)) 575 for i := range nulls { 576 nulls[i] = true 577 } 578 meta := make([]*arrow.Metadata, len(names)) 579 return NewCall("make_struct", values, 580 &MakeStructOptions{FieldNames: names, FieldNullability: nulls, FieldMetadata: meta}) 581 } 582 583 // Equal is a convenience function for the equal function 584 func Equal(lhs, rhs Expression) Expression { 585 return NewCall("equal", []Expression{lhs, rhs}, nil) 586 } 587 588 // NotEqual creates a call to not_equal 589 func NotEqual(lhs, rhs Expression) Expression { 590 return NewCall("not_equal", []Expression{lhs, rhs}, nil) 591 } 592 593 // Less is shorthand for NewCall("less",....) 594 func Less(lhs, rhs Expression) Expression { 595 return NewCall("less", []Expression{lhs, rhs}, nil) 596 } 597 598 // LessEqual is shorthand for NewCall("less_equal",....) 599 func LessEqual(lhs, rhs Expression) Expression { 600 return NewCall("less_equal", []Expression{lhs, rhs}, nil) 601 } 602 603 // Greater is shorthand for NewCall("greater",....) 604 func Greater(lhs, rhs Expression) Expression { 605 return NewCall("greater", []Expression{lhs, rhs}, nil) 606 } 607 608 // GreaterEqual is shorthand for NewCall("greater_equal",....) 609 func GreaterEqual(lhs, rhs Expression) Expression { 610 return NewCall("greater_equal", []Expression{lhs, rhs}, nil) 611 } 612 613 // IsNull creates an expression that returns true if the passed in expression is 614 // null. Optionally treating NaN as null if desired. 615 func IsNull(lhs Expression, nanIsNull bool) Expression { 616 return NewCall("less", []Expression{lhs}, &NullOptions{nanIsNull}) 617 } 618 619 // IsValid is the inverse of IsNull 620 func IsValid(lhs Expression) Expression { 621 return NewCall("is_valid", []Expression{lhs}, nil) 622 } 623 624 type binop func(lhs, rhs Expression) Expression 625 626 func foldLeft(op binop, args ...Expression) Expression { 627 switch len(args) { 628 case 0: 629 return nil 630 case 1: 631 return args[0] 632 } 633 634 folded := args[0] 635 for _, a := range args[1:] { 636 folded = op(folded, a) 637 } 638 return folded 639 } 640 641 func and(lhs, rhs Expression) Expression { 642 return NewCall("and_kleene", []Expression{lhs, rhs}, nil) 643 } 644 645 // And constructs a tree of calls to and_kleene for boolean And logic taking 646 // an arbitrary number of values. 647 func And(lhs, rhs Expression, ops ...Expression) Expression { 648 folded := foldLeft(and, append([]Expression{lhs, rhs}, ops...)...) 649 if folded != nil { 650 return folded 651 } 652 return NewLiteral(true) 653 } 654 655 func or(lhs, rhs Expression) Expression { 656 return NewCall("or_kleene", []Expression{lhs, rhs}, nil) 657 } 658 659 // Or constructs a tree of calls to or_kleene for boolean Or logic taking 660 // an arbitrary number of values. 661 func Or(lhs, rhs Expression, ops ...Expression) Expression { 662 folded := foldLeft(or, append([]Expression{lhs, rhs}, ops...)...) 663 if folded != nil { 664 return folded 665 } 666 return NewLiteral(false) 667 } 668 669 // Not creates a call to "invert" for the value specified. 670 func Not(expr Expression) Expression { 671 return NewCall("invert", []Expression{expr}, nil) 672 } 673 674 func SerializeOptions(opts FunctionOptions, mem memory.Allocator) (*memory.Buffer, error) { 675 sc, err := scalar.ToScalar(opts, mem) 676 if err != nil { 677 return nil, err 678 } 679 if sc, ok := sc.(releasable); ok { 680 defer sc.Release() 681 } 682 683 arr, err := scalar.MakeArrayFromScalar(sc, 1, mem) 684 if err != nil { 685 return nil, err 686 } 687 defer arr.Release() 688 689 batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Type: arr.DataType(), Nullable: true}}, nil), []arrow.Array{arr}, 1) 690 defer batch.Release() 691 692 buf := &bufferWriteSeeker{mem: mem} 693 wr, err := ipc.NewFileWriter(buf, ipc.WithSchema(batch.Schema()), ipc.WithAllocator(mem)) 694 if err != nil { 695 return nil, err 696 } 697 698 wr.Write(batch) 699 wr.Close() 700 return buf.buf, nil 701 } 702 703 // SerializeExpr serializes expressions by converting them to Metadata and 704 // storing this in the schema of a Record. Embedded arrays and scalars are 705 // stored in its columns. Finally the record is written as an IPC file 706 func SerializeExpr(expr Expression, mem memory.Allocator) (*memory.Buffer, error) { 707 var ( 708 cols []arrow.Array 709 metaKey []string 710 metaValue []string 711 visit func(Expression) error 712 ) 713 714 addScalar := func(s scalar.Scalar) (string, error) { 715 ret := len(cols) 716 arr, err := scalar.MakeArrayFromScalar(s, 1, mem) 717 if err != nil { 718 return "", err 719 } 720 cols = append(cols, arr) 721 return strconv.Itoa(ret), nil 722 } 723 724 visit = func(e Expression) error { 725 switch e := e.(type) { 726 case *Literal: 727 if !e.IsScalarExpr() { 728 return errors.New("not implemented: serialization of non-scalar literals") 729 } 730 metaKey = append(metaKey, "literal") 731 s, err := addScalar(e.Literal.(*ScalarDatum).Value) 732 if err != nil { 733 return err 734 } 735 metaValue = append(metaValue, s) 736 case *Parameter: 737 if e.ref.Name() == "" { 738 return errors.New("not implemented: serialization of non-name field_ref") 739 } 740 741 metaKey = append(metaKey, "field_ref") 742 metaValue = append(metaValue, e.ref.Name()) 743 case *Call: 744 metaKey = append(metaKey, "call") 745 metaValue = append(metaValue, e.funcName) 746 747 for _, arg := range e.args { 748 visit(arg) 749 } 750 751 if e.options != nil { 752 st, err := scalar.ToScalar(e.options, mem) 753 if err != nil { 754 return err 755 } 756 metaKey = append(metaKey, "options") 757 s, err := addScalar(st) 758 if err != nil { 759 return err 760 } 761 metaValue = append(metaValue, s) 762 763 for _, f := range st.(*scalar.Struct).Value { 764 switch s := f.(type) { 765 case releasable: 766 defer s.Release() 767 } 768 } 769 } 770 771 metaKey = append(metaKey, "end") 772 metaValue = append(metaValue, e.funcName) 773 } 774 return nil 775 } 776 777 if err := visit(expr); err != nil { 778 return nil, err 779 } 780 781 fields := make([]arrow.Field, len(cols)) 782 for i, c := range cols { 783 fields[i].Type = c.DataType() 784 defer c.Release() 785 } 786 787 metadata := arrow.NewMetadata(metaKey, metaValue) 788 rec := array.NewRecord(arrow.NewSchema(fields, &metadata), cols, 1) 789 defer rec.Release() 790 791 buf := &bufferWriteSeeker{mem: mem} 792 wr, err := ipc.NewFileWriter(buf, ipc.WithSchema(rec.Schema()), ipc.WithAllocator(mem)) 793 if err != nil { 794 return nil, err 795 } 796 797 wr.Write(rec) 798 wr.Close() 799 return buf.buf, nil 800 } 801 802 func DeserializeExpr(mem memory.Allocator, buf *memory.Buffer) (Expression, error) { 803 rdr, err := ipc.NewFileReader(bytes.NewReader(buf.Bytes()), ipc.WithAllocator(mem)) 804 if err != nil { 805 return nil, err 806 } 807 defer rdr.Close() 808 809 batch, err := rdr.Read() 810 if err != nil { 811 return nil, err 812 } 813 814 if !batch.Schema().HasMetadata() { 815 return nil, errors.New("serialized Expression's batch repr had no metadata") 816 } 817 818 if batch.NumRows() != 1 { 819 return nil, fmt.Errorf("serialized Expression's batch repr was not a single row - had %d", batch.NumRows()) 820 } 821 822 var ( 823 getone func() (Expression, error) 824 index int = 0 825 metadata = batch.Schema().Metadata() 826 ) 827 828 getscalar := func(i string) (scalar.Scalar, error) { 829 colIndex, err := strconv.ParseInt(i, 10, 32) 830 if err != nil { 831 return nil, err 832 } 833 if colIndex >= batch.NumCols() { 834 return nil, errors.New("column index out of bounds") 835 } 836 return scalar.GetScalar(batch.Column(int(colIndex)), 0) 837 } 838 839 getone = func() (Expression, error) { 840 if index >= metadata.Len() { 841 return nil, errors.New("unterminated serialized Expression") 842 } 843 844 key, val := metadata.Keys()[index], metadata.Values()[index] 845 index++ 846 847 switch key { 848 case "literal": 849 scalar, err := getscalar(val) 850 if err != nil { 851 return nil, err 852 } 853 if r, ok := scalar.(releasable); ok { 854 defer r.Release() 855 } 856 return NewLiteral(scalar), err 857 case "field_ref": 858 return NewFieldRef(val), nil 859 case "call": 860 args := make([]Expression, 0) 861 for metadata.Keys()[index] != "end" { 862 if metadata.Keys()[index] == "options" { 863 optsScalar, err := getscalar(metadata.Values()[index]) 864 if err != nil { 865 return nil, err 866 } 867 if r, ok := optsScalar.(releasable); ok { 868 defer r.Release() 869 } 870 var opts FunctionOptions 871 if optsScalar != nil { 872 typname, err := optsScalar.(*scalar.Struct).Field("_type_name") 873 if err != nil { 874 return nil, err 875 } 876 if typname.DataType().ID() != arrow.BINARY { 877 return nil, errors.New("options scalar typename must be binary") 878 } 879 880 optionsVal := reflect.New(funcOptionsMap[string(typname.(*scalar.Binary).Data())]).Interface() 881 if err := scalar.FromScalar(optsScalar.(*scalar.Struct), optionsVal); err != nil { 882 return nil, err 883 } 884 opts = optionsVal.(FunctionOptions) 885 } 886 index += 2 887 return NewCall(val, args, opts), nil 888 } 889 890 arg, err := getone() 891 if err != nil { 892 return nil, err 893 } 894 args = append(args, arg) 895 } 896 index++ 897 return NewCall(val, args, nil), nil 898 default: 899 return nil, fmt.Errorf("unrecognized serialized Expression key %s", key) 900 } 901 } 902 903 return getone() 904 }