github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/table_function/unnest.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package table_function
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/json"
    20  	"fmt"
    21  	"strconv"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    24  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    25  	"github.com/matrixorigin/matrixone/pkg/container/bytejson"
    26  	"github.com/matrixorigin/matrixone/pkg/container/types"
    27  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    28  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    29  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    30  	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
    31  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    32  )
    33  
    34  func genFilterMap(filters []string) map[string]struct{} {
    35  	if filters == nil {
    36  		return defaultFilterMap
    37  	}
    38  	filterMap := make(map[string]struct{}, len(filters))
    39  	for _, f := range filters {
    40  		filterMap[f] = struct{}{}
    41  	}
    42  	return filterMap
    43  }
    44  
    45  func unnestString(arg any, buf *bytes.Buffer) {
    46  	buf.WriteString("unnest")
    47  }
    48  
    49  func unnestPrepare(proc *process.Process, arg *Argument) error {
    50  	param := unnestParam{}
    51  	param.ColName = string(arg.Params)
    52  	if len(param.ColName) == 0 {
    53  		param.ColName = "UNNEST_DEFAULT"
    54  	}
    55  	var filters []string
    56  	for i := range arg.Attrs {
    57  		denied := false
    58  		for j := range unnestDeniedFilters {
    59  			if arg.Attrs[i] == unnestDeniedFilters[j] {
    60  				denied = true
    61  				break
    62  			}
    63  		}
    64  		if !denied {
    65  			filters = append(filters, arg.Attrs[i])
    66  		}
    67  	}
    68  	param.FilterMap = genFilterMap(filters)
    69  	if len(arg.Args) < 1 || len(arg.Args) > 3 {
    70  		return moerr.NewInvalidInput(proc.Ctx, "unnest: argument number must be 1, 2 or 3")
    71  	}
    72  	if len(arg.Args) == 1 {
    73  		vType := types.T_varchar.ToType()
    74  		bType := types.T_bool.ToType()
    75  		arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&vType), Expr: &plan.Expr_C{C: &plan2.Const{Value: &plan.Const_Sval{Sval: "$"}}}})
    76  		arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&bType), Expr: &plan.Expr_C{C: &plan2.Const{Value: &plan.Const_Bval{Bval: false}}}})
    77  	} else if len(arg.Args) == 2 {
    78  		bType := types.T_bool.ToType()
    79  		arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&bType), Expr: &plan.Expr_C{C: &plan2.Const{Value: &plan.Const_Bval{Bval: false}}}})
    80  	}
    81  	dt, err := json.Marshal(param)
    82  	if err != nil {
    83  		return err
    84  	}
    85  	arg.Params = dt
    86  	return nil
    87  }
    88  
    89  func unnestCall(_ int, proc *process.Process, arg *Argument) (bool, error) {
    90  	var (
    91  		err      error
    92  		rbat     *batch.Batch
    93  		jsonVec  *vector.Vector
    94  		pathVec  *vector.Vector
    95  		outerVec *vector.Vector
    96  		path     bytejson.Path
    97  		outer    bool
    98  	)
    99  	defer func() {
   100  		if err != nil && rbat != nil {
   101  			rbat.Clean(proc.Mp())
   102  		}
   103  		if jsonVec != nil {
   104  			jsonVec.Free(proc.Mp())
   105  		}
   106  		if pathVec != nil {
   107  			pathVec.Free(proc.Mp())
   108  		}
   109  		if outerVec != nil {
   110  			outerVec.Free(proc.Mp())
   111  		}
   112  	}()
   113  	bat := proc.InputBatch()
   114  	if bat == nil {
   115  		return true, nil
   116  	}
   117  	jsonVec, err = colexec.EvalExpr(bat, proc, arg.Args[0])
   118  	if err != nil {
   119  		return false, err
   120  	}
   121  	if jsonVec.Typ.Oid != types.T_json && jsonVec.Typ.Oid != types.T_varchar {
   122  		return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: first argument must be json or string, but got %s", jsonVec.Typ.String()))
   123  	}
   124  	pathVec, err = colexec.EvalExpr(bat, proc, arg.Args[1])
   125  	if err != nil {
   126  		return false, err
   127  	}
   128  	if pathVec.Typ.Oid != types.T_varchar {
   129  		return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: second argument must be string, but got %s", pathVec.Typ.String()))
   130  	}
   131  	outerVec, err = colexec.EvalExpr(bat, proc, arg.Args[2])
   132  	if err != nil {
   133  		return false, err
   134  	}
   135  	if outerVec.Typ.Oid != types.T_bool {
   136  		return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: third argument must be bool, but got %s", outerVec.Typ.String()))
   137  	}
   138  	if !pathVec.IsScalar() || !outerVec.IsScalar() {
   139  		return false, moerr.NewInvalidInput(proc.Ctx, "unnest: second and third arguments must be scalar")
   140  	}
   141  	path, err = types.ParseStringToPath(pathVec.GetString(0))
   142  	if err != nil {
   143  		return false, err
   144  	}
   145  	outer = vector.MustTCols[bool](outerVec)[0]
   146  	param := unnestParam{}
   147  	if err = json.Unmarshal(arg.Params, &param); err != nil {
   148  		return false, err
   149  	}
   150  	switch jsonVec.Typ.Oid {
   151  	case types.T_json:
   152  		rbat, err = handle(jsonVec, &path, outer, &param, arg, proc, parseJson)
   153  	case types.T_varchar:
   154  		rbat, err = handle(jsonVec, &path, outer, &param, arg, proc, parseStr)
   155  	}
   156  	if err != nil {
   157  		return false, err
   158  	}
   159  	proc.SetInputBatch(rbat)
   160  	return false, nil
   161  }
   162  
   163  func handle(jsonVec *vector.Vector, path *bytejson.Path, outer bool, param *unnestParam, arg *Argument, proc *process.Process, fn func(dt []byte) (bytejson.ByteJson, error)) (*batch.Batch, error) {
   164  	var (
   165  		err  error
   166  		rbat *batch.Batch
   167  		json bytejson.ByteJson
   168  		ures []bytejson.UnnestResult
   169  	)
   170  
   171  	rbat = batch.New(false, arg.Attrs)
   172  	rbat.Cnt = 1
   173  	for i := range arg.Rets {
   174  		rbat.Vecs[i] = vector.New(dupType(arg.Rets[i].Typ))
   175  	}
   176  
   177  	if jsonVec.IsScalar() {
   178  		json, err = fn(jsonVec.GetBytes(0))
   179  		if err != nil {
   180  			return nil, err
   181  		}
   182  		ures, err = json.Unnest(path, outer, unnestRecursive, unnestMode, param.FilterMap)
   183  		if err != nil {
   184  			return nil, err
   185  		}
   186  		rbat, err = makeBatch(rbat, ures, param, arg, proc)
   187  		if err != nil {
   188  			return nil, err
   189  		}
   190  		rbat.InitZsOne(len(ures))
   191  		return rbat, nil
   192  	}
   193  	jsonSlice := vector.MustBytesCols(jsonVec)
   194  	rows := 0
   195  	for i := range jsonSlice {
   196  		json, err = fn(jsonSlice[i])
   197  		if err != nil {
   198  			return nil, err
   199  		}
   200  		ures, err = json.Unnest(path, outer, unnestRecursive, unnestMode, param.FilterMap)
   201  		if err != nil {
   202  			return nil, err
   203  		}
   204  		rbat, err = makeBatch(rbat, ures, param, arg, proc)
   205  		if err != nil {
   206  			return nil, err
   207  		}
   208  		rows += len(ures)
   209  	}
   210  	rbat.InitZsOne(rows)
   211  	return rbat, nil
   212  }
   213  
   214  func makeBatch(bat *batch.Batch, ures []bytejson.UnnestResult, param *unnestParam, arg *Argument, proc *process.Process) (*batch.Batch, error) {
   215  	for i := 0; i < len(ures); i++ {
   216  		for j := 0; j < len(arg.Attrs); j++ {
   217  			vec := bat.GetVector(int32(j))
   218  			var err error
   219  			switch arg.Attrs[j] {
   220  			case "col":
   221  				err = vec.Append([]byte(param.ColName), false, proc.Mp())
   222  			case "seq":
   223  				err = vec.Append(int32(i), false, proc.Mp())
   224  			case "index":
   225  				val, ok := ures[i][arg.Attrs[j]]
   226  				if !ok || val == nil {
   227  					err = vec.Append(int32(0), true, proc.Mp())
   228  				} else {
   229  					intVal, _ := strconv.ParseInt(string(val), 10, 32)
   230  					err = vec.Append(int32(intVal), false, proc.Mp())
   231  				}
   232  			case "key", "path", "value", "this":
   233  				val, ok := ures[i][arg.Attrs[j]]
   234  				err = vec.Append(val, !ok || val == nil, proc.Mp())
   235  			default:
   236  				err = moerr.NewInvalidArg(proc.Ctx, "unnest: invalid column name:%s", arg.Attrs[j])
   237  			}
   238  			if err != nil {
   239  				return nil, err
   240  			}
   241  		}
   242  	}
   243  	return bat, nil
   244  }
   245  
   246  func parseJson(dt []byte) (bytejson.ByteJson, error) {
   247  	ret := types.DecodeJson(dt)
   248  	return ret, nil
   249  }
   250  func parseStr(dt []byte) (bytejson.ByteJson, error) {
   251  	return types.ParseSliceToByteJson(dt)
   252  }