github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/table_function/unnest.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package table_function 16 17 import ( 18 "bytes" 19 "encoding/json" 20 "fmt" 21 "strconv" 22 23 "github.com/matrixorigin/matrixone/pkg/common/moerr" 24 "github.com/matrixorigin/matrixone/pkg/container/batch" 25 "github.com/matrixorigin/matrixone/pkg/container/bytejson" 26 "github.com/matrixorigin/matrixone/pkg/container/types" 27 "github.com/matrixorigin/matrixone/pkg/container/vector" 28 "github.com/matrixorigin/matrixone/pkg/pb/plan" 29 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 30 plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan" 31 "github.com/matrixorigin/matrixone/pkg/vm/process" 32 ) 33 34 func genFilterMap(filters []string) map[string]struct{} { 35 if filters == nil { 36 return defaultFilterMap 37 } 38 filterMap := make(map[string]struct{}, len(filters)) 39 for _, f := range filters { 40 filterMap[f] = struct{}{} 41 } 42 return filterMap 43 } 44 45 func unnestString(arg any, buf *bytes.Buffer) { 46 buf.WriteString("unnest") 47 } 48 49 func unnestPrepare(proc *process.Process, arg *Argument) error { 50 param := unnestParam{} 51 param.ColName = string(arg.Params) 52 if len(param.ColName) == 0 { 53 param.ColName = "UNNEST_DEFAULT" 54 } 55 var filters []string 56 for i := range arg.Attrs { 57 denied := false 58 for j := range unnestDeniedFilters { 59 if arg.Attrs[i] == unnestDeniedFilters[j] { 60 denied = true 61 break 62 } 63 } 64 if !denied { 65 filters = append(filters, arg.Attrs[i]) 66 } 67 } 68 param.FilterMap = genFilterMap(filters) 69 if len(arg.Args) < 1 || len(arg.Args) > 3 { 70 return moerr.NewInvalidInput(proc.Ctx, "unnest: argument number must be 1, 2 or 3") 71 } 72 if len(arg.Args) == 1 { 73 vType := types.T_varchar.ToType() 74 bType := types.T_bool.ToType() 75 arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&vType), Expr: &plan.Expr_C{C: &plan2.Const{Value: &plan.Const_Sval{Sval: "$"}}}}) 76 arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&bType), Expr: &plan.Expr_C{C: &plan2.Const{Value: &plan.Const_Bval{Bval: false}}}}) 77 } else if len(arg.Args) == 2 { 78 bType := types.T_bool.ToType() 79 arg.Args = append(arg.Args, &plan.Expr{Typ: plan2.MakePlan2Type(&bType), Expr: &plan.Expr_C{C: &plan2.Const{Value: &plan.Const_Bval{Bval: false}}}}) 80 } 81 dt, err := json.Marshal(param) 82 if err != nil { 83 return err 84 } 85 arg.Params = dt 86 return nil 87 } 88 89 func unnestCall(_ int, proc *process.Process, arg *Argument) (bool, error) { 90 var ( 91 err error 92 rbat *batch.Batch 93 jsonVec *vector.Vector 94 pathVec *vector.Vector 95 outerVec *vector.Vector 96 path bytejson.Path 97 outer bool 98 ) 99 defer func() { 100 if err != nil && rbat != nil { 101 rbat.Clean(proc.Mp()) 102 } 103 if jsonVec != nil { 104 jsonVec.Free(proc.Mp()) 105 } 106 if pathVec != nil { 107 pathVec.Free(proc.Mp()) 108 } 109 if outerVec != nil { 110 outerVec.Free(proc.Mp()) 111 } 112 }() 113 bat := proc.InputBatch() 114 if bat == nil { 115 return true, nil 116 } 117 jsonVec, err = colexec.EvalExpr(bat, proc, arg.Args[0]) 118 if err != nil { 119 return false, err 120 } 121 if jsonVec.Typ.Oid != types.T_json && jsonVec.Typ.Oid != types.T_varchar { 122 return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: first argument must be json or string, but got %s", jsonVec.Typ.String())) 123 } 124 pathVec, err = colexec.EvalExpr(bat, proc, arg.Args[1]) 125 if err != nil { 126 return false, err 127 } 128 if pathVec.Typ.Oid != types.T_varchar { 129 return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: second argument must be string, but got %s", pathVec.Typ.String())) 130 } 131 outerVec, err = colexec.EvalExpr(bat, proc, arg.Args[2]) 132 if err != nil { 133 return false, err 134 } 135 if outerVec.Typ.Oid != types.T_bool { 136 return false, moerr.NewInvalidInput(proc.Ctx, fmt.Sprintf("unnest: third argument must be bool, but got %s", outerVec.Typ.String())) 137 } 138 if !pathVec.IsScalar() || !outerVec.IsScalar() { 139 return false, moerr.NewInvalidInput(proc.Ctx, "unnest: second and third arguments must be scalar") 140 } 141 path, err = types.ParseStringToPath(pathVec.GetString(0)) 142 if err != nil { 143 return false, err 144 } 145 outer = vector.MustTCols[bool](outerVec)[0] 146 param := unnestParam{} 147 if err = json.Unmarshal(arg.Params, ¶m); err != nil { 148 return false, err 149 } 150 switch jsonVec.Typ.Oid { 151 case types.T_json: 152 rbat, err = handle(jsonVec, &path, outer, ¶m, arg, proc, parseJson) 153 case types.T_varchar: 154 rbat, err = handle(jsonVec, &path, outer, ¶m, arg, proc, parseStr) 155 } 156 if err != nil { 157 return false, err 158 } 159 proc.SetInputBatch(rbat) 160 return false, nil 161 } 162 163 func handle(jsonVec *vector.Vector, path *bytejson.Path, outer bool, param *unnestParam, arg *Argument, proc *process.Process, fn func(dt []byte) (bytejson.ByteJson, error)) (*batch.Batch, error) { 164 var ( 165 err error 166 rbat *batch.Batch 167 json bytejson.ByteJson 168 ures []bytejson.UnnestResult 169 ) 170 171 rbat = batch.New(false, arg.Attrs) 172 rbat.Cnt = 1 173 for i := range arg.Rets { 174 rbat.Vecs[i] = vector.New(dupType(arg.Rets[i].Typ)) 175 } 176 177 if jsonVec.IsScalar() { 178 json, err = fn(jsonVec.GetBytes(0)) 179 if err != nil { 180 return nil, err 181 } 182 ures, err = json.Unnest(path, outer, unnestRecursive, unnestMode, param.FilterMap) 183 if err != nil { 184 return nil, err 185 } 186 rbat, err = makeBatch(rbat, ures, param, arg, proc) 187 if err != nil { 188 return nil, err 189 } 190 rbat.InitZsOne(len(ures)) 191 return rbat, nil 192 } 193 jsonSlice := vector.MustBytesCols(jsonVec) 194 rows := 0 195 for i := range jsonSlice { 196 json, err = fn(jsonSlice[i]) 197 if err != nil { 198 return nil, err 199 } 200 ures, err = json.Unnest(path, outer, unnestRecursive, unnestMode, param.FilterMap) 201 if err != nil { 202 return nil, err 203 } 204 rbat, err = makeBatch(rbat, ures, param, arg, proc) 205 if err != nil { 206 return nil, err 207 } 208 rows += len(ures) 209 } 210 rbat.InitZsOne(rows) 211 return rbat, nil 212 } 213 214 func makeBatch(bat *batch.Batch, ures []bytejson.UnnestResult, param *unnestParam, arg *Argument, proc *process.Process) (*batch.Batch, error) { 215 for i := 0; i < len(ures); i++ { 216 for j := 0; j < len(arg.Attrs); j++ { 217 vec := bat.GetVector(int32(j)) 218 var err error 219 switch arg.Attrs[j] { 220 case "col": 221 err = vec.Append([]byte(param.ColName), false, proc.Mp()) 222 case "seq": 223 err = vec.Append(int32(i), false, proc.Mp()) 224 case "index": 225 val, ok := ures[i][arg.Attrs[j]] 226 if !ok || val == nil { 227 err = vec.Append(int32(0), true, proc.Mp()) 228 } else { 229 intVal, _ := strconv.ParseInt(string(val), 10, 32) 230 err = vec.Append(int32(intVal), false, proc.Mp()) 231 } 232 case "key", "path", "value", "this": 233 val, ok := ures[i][arg.Attrs[j]] 234 err = vec.Append(val, !ok || val == nil, proc.Mp()) 235 default: 236 err = moerr.NewInvalidArg(proc.Ctx, "unnest: invalid column name:%s", arg.Attrs[j]) 237 } 238 if err != nil { 239 return nil, err 240 } 241 } 242 } 243 return bat, nil 244 } 245 246 func parseJson(dt []byte) (bytejson.ByteJson, error) { 247 ret := types.DecodeJson(dt) 248 return ret, nil 249 } 250 func parseStr(dt []byte) (bytejson.ByteJson, error) { 251 return types.ParseSliceToByteJson(dt) 252 }