github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/table_function/unnest.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package table_function
    16  
    17  import (
    18  	"encoding/json"
    19  	"fmt"
    20  	"strconv"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    23  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    24  	"github.com/matrixorigin/matrixone/pkg/container/bytejson"
    25  	"github.com/matrixorigin/matrixone/pkg/container/types"
    26  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    27  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    28  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    29  	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
    30  	"github.com/matrixorigin/matrixone/pkg/vm"
    31  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    32  )
    33  
    34  func genFilterMap(filters []string) map[string]struct{} {
    35  	if filters == nil {
    36  		return defaultFilterMap
    37  	}
    38  	filterMap := make(map[string]struct{}, len(filters))
    39  	for _, f := range filters {
    40  		filterMap[f] = struct{}{}
    41  	}
    42  	return filterMap
    43  }
    44  
    45  // func unnestString(buf *bytes.Buffer) {
    46  // 	buf.WriteString("unnest")
    47  // }
    48  
    49  func unnestPrepare(proc *process.Process, arg *Argument) error {
    50  	param := unnestParam{}
    51  	param.ColName = string(arg.Params)
    52  	if len(param.ColName) == 0 {
    53  		param.ColName = "UNNEST_DEFAULT"
    54  	}
    55  	var filters []string
    56  	for i := range arg.Attrs {
    57  		denied := false
    58  		for j := range unnestDeniedFilters {
    59  			if arg.Attrs[i] == unnestDeniedFilters[j] {
    60  				denied = true
    61  				break
    62  			}
    63  		}
    64  		if !denied {
    65  			filters = append(filters, arg.Attrs[i])
    66  		}
    67  	}
    68  	param.FilterMap = genFilterMap(filters)
    69  	if len(arg.Args) < 1 || len(arg.Args) > 3 {
    70  		return moerr.NewInvalidInput(proc.Ctx, "unnest: argument number must be 1, 2 or 3")
    71  	}
    72  	if len(arg.Args) == 1 {
    73  		vType := types.T_varchar.ToType()
    74  		bType := types.T_bool.ToType()
    75  		arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&vType), Expr: &plan.Expr_Lit{Lit: &plan2.Const{Value: &plan.Literal_Sval{Sval: "$"}}}})
    76  		arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&bType), Expr: &plan.Expr_Lit{Lit: &plan2.Const{Value: &plan.Literal_Bval{Bval: false}}}})
    77  	} else if len(arg.Args) == 2 {
    78  		bType := types.T_bool.ToType()
    79  		arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&bType), Expr: &plan.Expr_Lit{Lit: &plan2.Const{Value: &plan.Literal_Bval{Bval: false}}}})
    80  	}
    81  	dt, err := json.Marshal(param)
    82  	if err != nil {
    83  		return err
    84  	}
    85  	arg.Params = dt
    86  
    87  	arg.ctr = new(container)
    88  	arg.ctr.executorsForArgs, err = colexec.NewExpressionExecutorsFromPlanExpressions(proc, arg.Args)
    89  	return err
    90  }
    91  
    92  func unnestCall(_ int, proc *process.Process, arg *Argument, result *vm.CallResult) (bool, error) {
    93  	var (
    94  		err      error
    95  		rbat     *batch.Batch
    96  		jsonVec  *vector.Vector
    97  		pathVec  *vector.Vector
    98  		outerVec *vector.Vector
    99  		path     bytejson.Path
   100  		outer    bool
   101  	)
   102  	bat := result.Batch
   103  	defer func() {
   104  		if err != nil && rbat != nil {
   105  			rbat.Clean(proc.Mp())
   106  		}
   107  	}()
   108  	if bat == nil {
   109  		return true, nil
   110  	}
   111  	if bat.IsEmpty() {
   112  		proc.PutBatch(bat)
   113  		result.Batch = batch.EmptyBatch
   114  		return false, nil
   115  	}
   116  	jsonVec, err = arg.ctr.executorsForArgs[0].Eval(proc, []*batch.Batch{bat})
   117  	if err != nil {
   118  		return false, err
   119  	}
   120  	if jsonVec.GetType().Oid != types.T_json && jsonVec.GetType().Oid != types.T_varchar {
   121  		return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: first argument must be json or string, but got %s", jsonVec.GetType().String()))
   122  	}
   123  	pathVec, err = arg.ctr.executorsForArgs[1].Eval(proc, []*batch.Batch{bat})
   124  	if err != nil {
   125  		return false, err
   126  	}
   127  	if pathVec.GetType().Oid != types.T_varchar {
   128  		return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: second argument must be string, but got %s", pathVec.GetType().String()))
   129  	}
   130  	outerVec, err = arg.ctr.executorsForArgs[2].Eval(proc, []*batch.Batch{bat})
   131  	if err != nil {
   132  		return false, err
   133  	}
   134  	if outerVec.GetType().Oid != types.T_bool {
   135  		return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: third argument must be bool, but got %s", outerVec.GetType().String()))
   136  	}
   137  	if !pathVec.IsConst() || !outerVec.IsConst() {
   138  		return false, moerr.NewInvalidInput(proc.Ctx, "unnest: second and third arguments must be scalar")
   139  	}
   140  	path, err = types.ParseStringToPath(pathVec.GetStringAt(0))
   141  	if err != nil {
   142  		return false, err
   143  	}
   144  	outer = vector.MustFixedCol[bool](outerVec)[0]
   145  	param := unnestParam{}
   146  	if err = json.Unmarshal(arg.Params, &param); err != nil {
   147  		return false, err
   148  	}
   149  	switch jsonVec.GetType().Oid {
   150  	case types.T_json:
   151  		rbat, err = handle(jsonVec, &path, outer, &param, arg, proc, parseJson)
   152  	case types.T_varchar:
   153  		rbat, err = handle(jsonVec, &path, outer, &param, arg, proc, parseStr)
   154  	}
   155  	if err != nil {
   156  		return false, err
   157  	}
   158  	result.Batch = rbat
   159  	return false, nil
   160  }
   161  
   162  func handle(jsonVec *vector.Vector, path *bytejson.Path, outer bool, param *unnestParam, arg *Argument, proc *process.Process, fn func(dt []byte) (bytejson.ByteJson, error)) (*batch.Batch, error) {
   163  	var (
   164  		err  error
   165  		rbat *batch.Batch
   166  		json bytejson.ByteJson
   167  		ures []bytejson.UnnestResult
   168  	)
   169  
   170  	rbat = batch.NewWithSize(len(arg.Attrs))
   171  	rbat.Attrs = arg.Attrs
   172  	rbat.Cnt = 1
   173  	for i := range arg.retSchema {
   174  		rbat.Vecs[i] = proc.GetVector(arg.retSchema[i])
   175  	}
   176  
   177  	if jsonVec.IsConst() {
   178  		json, err = fn(jsonVec.GetBytesAt(0))
   179  		if err != nil {
   180  			return nil, err
   181  		}
   182  		ures, err = json.Unnest(path, outer, unnestRecursive, unnestMode, param.FilterMap)
   183  		if err != nil {
   184  			return nil, err
   185  		}
   186  		rbat, err = makeBatch(rbat, ures, param, arg, proc)
   187  		if err != nil {
   188  			return nil, err
   189  		}
   190  		rbat.SetRowCount(len(ures))
   191  		return rbat, nil
   192  	}
   193  	jsonSlice := vector.ExpandBytesCol(jsonVec)
   194  	rows := 0
   195  	for i := range jsonSlice {
   196  		json, err = fn(jsonSlice[i])
   197  		if err != nil {
   198  			return nil, err
   199  		}
   200  		ures, err = json.Unnest(path, outer, unnestRecursive, unnestMode, param.FilterMap)
   201  		if err != nil {
   202  			return nil, err
   203  		}
   204  		rbat, err = makeBatch(rbat, ures, param, arg, proc)
   205  		if err != nil {
   206  			return nil, err
   207  		}
   208  		rows += len(ures)
   209  	}
   210  	rbat.SetRowCount(rows)
   211  	return rbat, nil
   212  }
   213  
   214  func makeBatch(bat *batch.Batch, ures []bytejson.UnnestResult, param *unnestParam, arg *Argument, proc *process.Process) (*batch.Batch, error) {
   215  	for i := 0; i < len(ures); i++ {
   216  		for j := 0; j < len(arg.Attrs); j++ {
   217  			vec := bat.GetVector(int32(j))
   218  			var err error
   219  			switch arg.Attrs[j] {
   220  			case "col":
   221  				err = vector.AppendBytes(vec, []byte(param.ColName), false, proc.Mp())
   222  			case "seq":
   223  				err = vector.AppendFixed(vec, int32(i), false, proc.Mp())
   224  			case "index":
   225  				val, ok := ures[i][arg.Attrs[j]]
   226  				if !ok || val == nil {
   227  					err = vector.AppendFixed(vec, int32(0), true, proc.Mp())
   228  				} else {
   229  					intVal, _ := strconv.ParseInt(string(val), 10, 32)
   230  					err = vector.AppendFixed(vec, int32(intVal), false, proc.Mp())
   231  				}
   232  			case "key", "path", "value", "this":
   233  				val, ok := ures[i][arg.Attrs[j]]
   234  				err = vector.AppendBytes(vec, val, !ok || val == nil, proc.Mp())
   235  			default:
   236  				err = moerr.NewInvalidArg(proc.Ctx, "unnest: invalid column name:%s", arg.Attrs[j])
   237  			}
   238  			if err != nil {
   239  				return nil, err
   240  			}
   241  		}
   242  	}
   243  	return bat, nil
   244  }
   245  
   246  func parseJson(dt []byte) (bytejson.ByteJson, error) {
   247  	ret := types.DecodeJson(dt)
   248  	return ret, nil
   249  }
   250  func parseStr(dt []byte) (bytejson.ByteJson, error) {
   251  	return types.ParseSliceToByteJson(dt)
   252  }