github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/table_function/unnest.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package table_function 16 17 import ( 18 "encoding/json" 19 "fmt" 20 "strconv" 21 22 "github.com/matrixorigin/matrixone/pkg/common/moerr" 23 "github.com/matrixorigin/matrixone/pkg/container/batch" 24 "github.com/matrixorigin/matrixone/pkg/container/bytejson" 25 "github.com/matrixorigin/matrixone/pkg/container/types" 26 "github.com/matrixorigin/matrixone/pkg/container/vector" 27 "github.com/matrixorigin/matrixone/pkg/pb/plan" 28 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 29 plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan" 30 "github.com/matrixorigin/matrixone/pkg/vm" 31 "github.com/matrixorigin/matrixone/pkg/vm/process" 32 ) 33 34 func genFilterMap(filters []string) map[string]struct{} { 35 if filters == nil { 36 return defaultFilterMap 37 } 38 filterMap := make(map[string]struct{}, len(filters)) 39 for _, f := range filters { 40 filterMap[f] = struct{}{} 41 } 42 return filterMap 43 } 44 45 // func unnestString(buf *bytes.Buffer) { 46 // buf.WriteString("unnest") 47 // } 48 49 func unnestPrepare(proc *process.Process, arg *Argument) error { 50 param := unnestParam{} 51 param.ColName = string(arg.Params) 52 if len(param.ColName) == 0 { 53 param.ColName = "UNNEST_DEFAULT" 54 } 55 var filters []string 56 for i := range arg.Attrs { 57 denied := false 58 for j := range unnestDeniedFilters { 59 if arg.Attrs[i] == unnestDeniedFilters[j] { 60 denied = true 61 break 62 } 63 } 64 if !denied { 65 filters = append(filters, arg.Attrs[i]) 66 } 67 } 68 param.FilterMap = genFilterMap(filters) 69 if len(arg.Args) < 1 || len(arg.Args) > 3 { 70 return moerr.NewInvalidInput(proc.Ctx, "unnest: argument number must be 1, 2 or 3") 71 } 72 if len(arg.Args) == 1 { 73 vType := types.T_varchar.ToType() 74 bType := types.T_bool.ToType() 75 arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&vType), Expr: &plan.Expr_Lit{Lit: &plan2.Const{Value: &plan.Literal_Sval{Sval: "$"}}}}) 76 arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&bType), Expr: &plan.Expr_Lit{Lit: &plan2.Const{Value: &plan.Literal_Bval{Bval: false}}}}) 77 } else if len(arg.Args) == 2 { 78 bType := types.T_bool.ToType() 79 arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&bType), Expr: &plan.Expr_Lit{Lit: &plan2.Const{Value: &plan.Literal_Bval{Bval: false}}}}) 80 } 81 dt, err := json.Marshal(param) 82 if err != nil { 83 return err 84 } 85 arg.Params = dt 86 87 arg.ctr = new(container) 88 arg.ctr.executorsForArgs, err = colexec.NewExpressionExecutorsFromPlanExpressions(proc, arg.Args) 89 return err 90 } 91 92 func unnestCall(_ int, proc *process.Process, arg *Argument, result *vm.CallResult) (bool, error) { 93 var ( 94 err error 95 rbat *batch.Batch 96 jsonVec *vector.Vector 97 pathVec *vector.Vector 98 outerVec *vector.Vector 99 path bytejson.Path 100 outer bool 101 ) 102 bat := result.Batch 103 defer func() { 104 if err != nil && rbat != nil { 105 rbat.Clean(proc.Mp()) 106 } 107 }() 108 if bat == nil { 109 return true, nil 110 } 111 if bat.IsEmpty() { 112 proc.PutBatch(bat) 113 result.Batch = batch.EmptyBatch 114 return false, nil 115 } 116 jsonVec, err = arg.ctr.executorsForArgs[0].Eval(proc, []*batch.Batch{bat}) 117 if err != nil { 118 return false, err 119 } 120 if jsonVec.GetType().Oid != types.T_json && jsonVec.GetType().Oid != types.T_varchar { 121 return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: first argument must be json or string, but got %s", jsonVec.GetType().String())) 122 } 123 pathVec, err = arg.ctr.executorsForArgs[1].Eval(proc, []*batch.Batch{bat}) 124 if err != nil { 125 return false, err 126 } 127 if pathVec.GetType().Oid != types.T_varchar { 128 return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: second argument must be string, but got %s", pathVec.GetType().String())) 129 } 130 outerVec, err = arg.ctr.executorsForArgs[2].Eval(proc, []*batch.Batch{bat}) 131 if err != nil { 132 return false, err 133 } 134 if outerVec.GetType().Oid != types.T_bool { 135 return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: third argument must be bool, but got %s", outerVec.GetType().String())) 136 } 137 if !pathVec.IsConst() || !outerVec.IsConst() { 138 return false, moerr.NewInvalidInput(proc.Ctx, "unnest: second and third arguments must be scalar") 139 } 140 path, err = types.ParseStringToPath(pathVec.GetStringAt(0)) 141 if err != nil { 142 return false, err 143 } 144 outer = vector.MustFixedCol[bool](outerVec)[0] 145 param := unnestParam{} 146 if err = json.Unmarshal(arg.Params, ¶m); err != nil { 147 return false, err 148 } 149 switch jsonVec.GetType().Oid { 150 case types.T_json: 151 rbat, err = handle(jsonVec, &path, outer, ¶m, arg, proc, parseJson) 152 case types.T_varchar: 153 rbat, err = handle(jsonVec, &path, outer, ¶m, arg, proc, parseStr) 154 } 155 if err != nil { 156 return false, err 157 } 158 result.Batch = rbat 159 return false, nil 160 } 161 162 func handle(jsonVec *vector.Vector, path *bytejson.Path, outer bool, param *unnestParam, arg *Argument, proc *process.Process, fn func(dt []byte) (bytejson.ByteJson, error)) (*batch.Batch, error) { 163 var ( 164 err error 165 rbat *batch.Batch 166 json bytejson.ByteJson 167 ures []bytejson.UnnestResult 168 ) 169 170 rbat = batch.NewWithSize(len(arg.Attrs)) 171 rbat.Attrs = arg.Attrs 172 rbat.Cnt = 1 173 for i := range arg.retSchema { 174 rbat.Vecs[i] = proc.GetVector(arg.retSchema[i]) 175 } 176 177 if jsonVec.IsConst() { 178 json, err = fn(jsonVec.GetBytesAt(0)) 179 if err != nil { 180 return nil, err 181 } 182 ures, err = json.Unnest(path, outer, unnestRecursive, unnestMode, param.FilterMap) 183 if err != nil { 184 return nil, err 185 } 186 rbat, err = makeBatch(rbat, ures, param, arg, proc) 187 if err != nil { 188 return nil, err 189 } 190 rbat.SetRowCount(len(ures)) 191 return rbat, nil 192 } 193 jsonSlice := vector.ExpandBytesCol(jsonVec) 194 rows := 0 195 for i := range jsonSlice { 196 json, err = fn(jsonSlice[i]) 197 if err != nil { 198 return nil, err 199 } 200 ures, err = json.Unnest(path, outer, unnestRecursive, unnestMode, param.FilterMap) 201 if err != nil { 202 return nil, err 203 } 204 rbat, err = makeBatch(rbat, ures, param, arg, proc) 205 if err != nil { 206 return nil, err 207 } 208 rows += len(ures) 209 } 210 rbat.SetRowCount(rows) 211 return rbat, nil 212 } 213 214 func makeBatch(bat *batch.Batch, ures []bytejson.UnnestResult, param *unnestParam, arg *Argument, proc *process.Process) (*batch.Batch, error) { 215 for i := 0; i < len(ures); i++ { 216 for j := 0; j < len(arg.Attrs); j++ { 217 vec := bat.GetVector(int32(j)) 218 var err error 219 switch arg.Attrs[j] { 220 case "col": 221 err = vector.AppendBytes(vec, []byte(param.ColName), false, proc.Mp()) 222 case "seq": 223 err = vector.AppendFixed(vec, int32(i), false, proc.Mp()) 224 case "index": 225 val, ok := ures[i][arg.Attrs[j]] 226 if !ok || val == nil { 227 err = vector.AppendFixed(vec, int32(0), true, proc.Mp()) 228 } else { 229 intVal, _ := strconv.ParseInt(string(val), 10, 32) 230 err = vector.AppendFixed(vec, int32(intVal), false, proc.Mp()) 231 } 232 case "key", "path", "value", "this": 233 val, ok := ures[i][arg.Attrs[j]] 234 err = vector.AppendBytes(vec, val, !ok || val == nil, proc.Mp()) 235 default: 236 err = moerr.NewInvalidArg(proc.Ctx, "unnest: invalid column name:%s", arg.Attrs[j]) 237 } 238 if err != nil { 239 return nil, err 240 } 241 } 242 } 243 return bat, nil 244 } 245 246 func parseJson(dt []byte) (bytejson.ByteJson, error) { 247 ret := types.DecodeJson(dt) 248 return ret, nil 249 } 250 func parseStr(dt []byte) (bytejson.ByteJson, error) { 251 return types.ParseSliceToByteJson(dt) 252 }