github.com/matrixorigin/matrixone@v0.7.0/pkg/container/bytejson/bytejson.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bytejson 16 17 import ( 18 "bytes" 19 "encoding/json" 20 "fmt" 21 "math" 22 "sort" 23 "strconv" 24 "strings" 25 26 "github.com/matrixorigin/matrixone/pkg/common/moerr" 27 ) 28 29 func (bj ByteJson) String() string { 30 ret, _ := bj.MarshalJSON() 31 return string(ret) 32 } 33 34 func (bj ByteJson) Unquote() (string, error) { 35 if bj.Type != TpCodeString { 36 return bj.String(), nil 37 } 38 str := bj.GetString() 39 if len(str) < 2 || (str[0] != '"' || str[len(str)-1] != '"') { 40 return string(str), nil 41 } 42 str = str[1 : len(str)-1] 43 var sb strings.Builder 44 for i := 0; i < len(str); i++ { 45 if str[i] != '\\' { 46 sb.WriteByte(str[i]) 47 continue 48 } 49 i++ 50 if trans, ok := escapedChars[str[i]]; ok { 51 sb.WriteByte(trans) 52 continue 53 } 54 if str[i] == 'u' { // transform unicode to utf8 55 if i+4 > len(str) { 56 return "", moerr.NewInvalidInputNoCtx("invalid unicode") 57 } 58 unicodeStr := string(str[i-1 : i+5]) 59 content := strings.Replace(strconv.Quote(unicodeStr), `\\u`, `\u`, -1) 60 text, err := strconv.Unquote(content) 61 if err != nil { 62 return "", moerr.NewInvalidInputNoCtx("invalid unicode") 63 } 64 sb.WriteString(text) 65 i += 4 66 continue 67 } 68 sb.WriteByte(str[i]) 69 } 70 return sb.String(), nil 71 } 72 73 // MarshalJSON transform bytejson to []byte,for visible 74 func (bj ByteJson) MarshalJSON() ([]byte, error) { 75 ret := make([]byte, 0, len(bj.Data)*3/2) 76 return bj.to(ret) 77 } 78 79 // Marshal transform bytejson to []byte,for storage 80 func (bj ByteJson) Marshal() ([]byte, error) { 81 buf := make([]byte, len(bj.Data)+1) 82 buf[0] = byte(bj.Type) 83 copy(buf[1:], bj.Data) 84 return buf, nil 85 } 86 87 // Unmarshal transform storage []byte to bytejson 88 func (bj *ByteJson) Unmarshal(buf []byte) error { 89 //TODO add validate checker 90 bj.Type = TpCode(buf[0]) 91 bj.Data = buf[1:] 92 return nil 93 } 94 95 // UnmarshalJSON transform visible []byte to bytejson 96 func (bj *ByteJson) UnmarshalJSON(data []byte) error { 97 var decoder = json.NewDecoder(bytes.NewReader(data)) 98 decoder.UseNumber() 99 var in interface{} 100 err := decoder.Decode(&in) 101 if err != nil { 102 return nil 103 } 104 buf := make([]byte, 0, len(data)) 105 if tpCode, buf, err := addElem(buf, in); err != nil { 106 return err 107 } else { 108 bj.Data = buf 109 bj.Type = tpCode 110 } 111 return nil 112 } 113 114 func (bj *ByteJson) UnmarshalObject(obj interface{}) (err error) { 115 buf := make([]byte, 0, 64) 116 var tpCode TpCode 117 if tpCode, buf, err = addElem(buf, obj); err != nil { 118 return 119 } 120 bj.Type = tpCode 121 bj.Data = buf 122 return 123 } 124 125 func (bj ByteJson) IsNull() bool { 126 return bj.Type == TpCodeLiteral && bj.Data[0] == LiteralNull 127 } 128 129 func (bj ByteJson) GetElemCnt() int { 130 return int(endian.Uint32(bj.Data)) 131 } 132 133 func (bj ByteJson) GetInt64() int64 { 134 return int64(bj.GetUint64()) 135 } 136 func (bj ByteJson) GetUint64() uint64 { 137 return endian.Uint64(bj.Data) 138 } 139 140 func (bj ByteJson) GetFloat64() float64 { 141 return math.Float64frombits(bj.GetUint64()) 142 } 143 144 func (bj ByteJson) GetString() []byte { 145 num, length := calStrLen(bj.Data) 146 return bj.Data[length : length+num] 147 } 148 149 func (bj ByteJson) to(buf []byte) ([]byte, error) { 150 var err error 151 switch bj.Type { 152 case TpCodeArray: 153 buf, err = bj.toArray(buf) 154 case TpCodeObject: 155 buf, err = bj.toObject(buf) 156 case TpCodeInt64: 157 buf = bj.toInt64(buf) 158 case TpCodeUint64: 159 buf = bj.toUint64(buf) 160 case TpCodeLiteral: 161 buf = bj.toLiteral(buf) 162 case TpCodeFloat64: 163 buf, err = bj.toFloat64(buf) 164 case TpCodeString: 165 buf = bj.toString(buf) 166 default: 167 err = moerr.NewInvalidInputNoCtx("invalid json type '%v'", bj.Type) 168 } 169 return buf, err 170 } 171 172 func (bj ByteJson) toArray(buf []byte) ([]byte, error) { 173 cnt := bj.GetElemCnt() 174 buf = append(buf, '[') 175 var err error 176 for i := 0; i < cnt; i++ { 177 if i != 0 { 178 buf = append(buf, ", "...) 179 } 180 buf, err = bj.getArrayElem(i).to(buf) 181 if err != nil { 182 return nil, err 183 } 184 } 185 return append(buf, ']'), nil 186 } 187 188 func (bj ByteJson) toObject(buf []byte) ([]byte, error) { 189 cnt := bj.GetElemCnt() 190 buf = append(buf, '{') 191 for i := 0; i < cnt; i++ { 192 if i != 0 { 193 buf = append(buf, ", "...) 194 } 195 var err error 196 buf = toString(buf, bj.getObjectKey(i)) 197 buf = append(buf, ": "...) 198 buf, err = bj.getObjectVal(i).to(buf) 199 if err != nil { 200 return nil, err 201 } 202 } 203 return append(buf, '}'), nil 204 } 205 206 func (bj ByteJson) toInt64(buf []byte) []byte { 207 return strconv.AppendInt(buf, bj.GetInt64(), 10) 208 } 209 func (bj ByteJson) toUint64(buf []byte) []byte { 210 return strconv.AppendUint(buf, bj.GetUint64(), 10) 211 } 212 213 func (bj ByteJson) toLiteral(buf []byte) []byte { 214 litTp := bj.Data[0] 215 switch litTp { 216 case LiteralNull: 217 buf = append(buf, "null"...) 218 case LiteralTrue: 219 buf = append(buf, "true"...) 220 case LiteralFalse: 221 buf = append(buf, "false"...) 222 default: 223 panic(fmt.Sprintf("invalid literal type:%d", litTp)) 224 } 225 return buf 226 } 227 228 func (bj ByteJson) toFloat64(buf []byte) ([]byte, error) { 229 f := bj.GetFloat64() 230 err := checkFloat64(f) 231 if err != nil { 232 return nil, err 233 } 234 // https://github.com/golang/go/issues/14135 235 var format byte 236 abs := math.Abs(f) 237 if abs == 0 || 1e-6 <= abs && abs < 1e21 { 238 format = 'f' 239 } else { 240 format = 'e' 241 } 242 buf = strconv.AppendFloat(buf, f, format, -1, 64) 243 return buf, nil 244 } 245 246 // transform byte string to visible string 247 func (bj ByteJson) toString(buf []byte) []byte { 248 data := bj.GetString() 249 return toString(buf, data) 250 } 251 252 func (bj ByteJson) getObjectKey(i int) []byte { 253 keyOff := int(endian.Uint32(bj.Data[headerSize+i*keyEntrySize:])) 254 keyLen := int(endian.Uint16(bj.Data[headerSize+i*keyEntrySize+keyOriginOff:])) 255 return bj.Data[keyOff : keyOff+keyLen] 256 } 257 258 func (bj ByteJson) getArrayElem(i int) ByteJson { 259 return bj.getValEntry(headerSize + i*valEntrySize) 260 } 261 262 func (bj ByteJson) getObjectVal(i int) ByteJson { 263 cnt := bj.GetElemCnt() 264 return bj.getValEntry(headerSize + cnt*keyEntrySize + i*valEntrySize) 265 } 266 267 func (bj ByteJson) getValEntry(off int) ByteJson { 268 tpCode := bj.Data[off] 269 valOff := endian.Uint32(bj.Data[off+valTypeSize:]) 270 switch TpCode(tpCode) { 271 case TpCodeLiteral: 272 return ByteJson{Type: TpCodeLiteral, Data: bj.Data[off+valTypeSize : off+valTypeSize+1]} 273 case TpCodeUint64, TpCodeInt64, TpCodeFloat64: 274 return ByteJson{Type: TpCode(tpCode), Data: bj.Data[valOff : valOff+numberSize]} 275 case TpCodeString: 276 num, length := calStrLen(bj.Data[valOff:]) 277 totalLen := uint32(num) + uint32(length) 278 return ByteJson{Type: TpCode(tpCode), Data: bj.Data[valOff : valOff+totalLen]} 279 } 280 dataBytes := endian.Uint32(bj.Data[valOff+docSizeOff:]) 281 return ByteJson{Type: TpCode(tpCode), Data: bj.Data[valOff : valOff+dataBytes]} 282 } 283 284 func (bj ByteJson) queryValByKey(key []byte) ByteJson { 285 cnt := bj.GetElemCnt() 286 idx := sort.Search(cnt, func(i int) bool { 287 return bytes.Compare(bj.getObjectKey(i), key) >= 0 288 }) 289 if idx >= cnt || !bytes.Equal(bj.getObjectKey(idx), key) { 290 dt := make([]byte, 1) 291 dt[0] = LiteralNull 292 return ByteJson{ 293 Type: TpCodeLiteral, 294 Data: dt, 295 } 296 } 297 return bj.getObjectVal(idx) 298 } 299 300 func (bj ByteJson) query(cur []ByteJson, path *Path) []ByteJson { 301 if path.empty() { 302 cur = append(cur, bj) 303 return cur 304 } 305 sub, nPath := path.step() 306 307 if sub.tp == subPathDoubleStar { 308 cur = bj.query(cur, &nPath) 309 if bj.Type == TpCodeObject { 310 cnt := bj.GetElemCnt() 311 for i := 0; i < cnt; i++ { 312 cur = bj.getObjectVal(i).query(cur, path) // take care here, the argument is path,not nPath 313 } 314 } else if bj.Type == TpCodeArray { 315 cnt := bj.GetElemCnt() 316 for i := 0; i < cnt; i++ { 317 cur = bj.getArrayElem(i).query(cur, path) // take care here, the argument is path,not nPath 318 } 319 } 320 return cur 321 } 322 323 if bj.Type == TpCodeObject { 324 switch sub.tp { 325 case subPathIdx: 326 start, _, _ := sub.idx.genIndex(1) 327 if start == 0 { 328 cur = bj.query(cur, &nPath) 329 } 330 case subPathRange: 331 se := sub.iRange.genRange(bj.GetElemCnt()) 332 if se[0] == 0 { 333 cur = bj.query(cur, &nPath) 334 } 335 case subPathKey: 336 cnt := bj.GetElemCnt() 337 if sub.key == "*" { 338 for i := 0; i < cnt; i++ { 339 cur = bj.getObjectVal(i).query(cur, &nPath) 340 } 341 } else { 342 tmp := bj.queryValByKey(string2Slice(sub.key)) 343 cur = tmp.query(cur, &nPath) 344 } 345 } 346 return cur 347 } 348 349 if bj.Type == TpCodeArray { 350 cnt := bj.GetElemCnt() 351 switch sub.tp { 352 case subPathIdx: 353 idx, _, last := sub.idx.genIndex(cnt) 354 if last && idx < 0 { 355 tmp := ByteJson{Type: TpCodeLiteral, Data: []byte{LiteralNull}} 356 cur = append(cur, tmp) 357 return cur 358 } 359 if idx == subPathIdxALL { 360 for i := 0; i < cnt; i++ { 361 cur = bj.getArrayElem(i).query(cur, &nPath) 362 } 363 } else { 364 cur = bj.getArrayElem(idx).query(cur, &nPath) 365 } 366 case subPathRange: 367 se := sub.iRange.genRange(cnt) 368 if se[0] == subPathIdxErr { 369 tmp := ByteJson{Type: TpCodeLiteral, Data: []byte{LiteralNull}} 370 cur = append(cur, tmp) 371 return cur 372 } 373 for i := se[0]; i <= se[1]; i++ { 374 cur = bj.getArrayElem(i).query(cur, &nPath) 375 } 376 } 377 } 378 return cur 379 } 380 func (bj ByteJson) Query(paths []*Path) *ByteJson { 381 out := make([]ByteJson, 0, len(paths)) 382 for _, path := range paths { 383 tmp := bj.query(nil, path) 384 if len(tmp) > 0 { 385 allNull := checkAllNull(tmp) 386 if !allNull { 387 out = append(out, tmp...) 388 } 389 } 390 } 391 if len(out) == 0 { 392 return &ByteJson{Type: TpCodeLiteral, Data: []byte{LiteralNull}} 393 } 394 if len(out) == 1 && len(paths) == 1 { 395 return &out[0] 396 } 397 allNull := checkAllNull(out) 398 if allNull { 399 return &ByteJson{Type: TpCodeLiteral, Data: []byte{LiteralNull}} 400 } 401 return mergeToArray(out) 402 } 403 404 func (bj ByteJson) canUnnest() bool { 405 return bj.Type == TpCodeArray || bj.Type == TpCodeObject 406 } 407 408 func (bj ByteJson) queryWithSubPath(keys []string, vals []ByteJson, path *Path, pathStr string) ([]string, []ByteJson) { 409 if path.empty() { 410 keys = append(keys, pathStr) 411 vals = append(vals, bj) 412 return keys, vals 413 } 414 sub, nPath := path.step() 415 if sub.tp == subPathDoubleStar { 416 keys, vals = bj.queryWithSubPath(keys, vals, &nPath, pathStr) 417 if bj.Type == TpCodeObject { 418 cnt := bj.GetElemCnt() 419 for i := 0; i < cnt; i++ { 420 newPathStr := fmt.Sprintf("%s.%s", pathStr, bj.getObjectKey(i)) 421 keys, vals = bj.getObjectVal(i).queryWithSubPath(keys, vals, path, newPathStr) // take care here, the argument is path,not nPath 422 } 423 } else if bj.Type == TpCodeArray { 424 cnt := bj.GetElemCnt() 425 for i := 0; i < cnt; i++ { 426 newPathStr := fmt.Sprintf("%s[%d]", pathStr, i) 427 keys, vals = bj.getArrayElem(i).queryWithSubPath(keys, vals, path, newPathStr) // take care here, the argument is path,not nPath 428 } 429 } 430 return keys, vals 431 } 432 if bj.Type == TpCodeObject { 433 cnt := bj.GetElemCnt() 434 switch sub.tp { 435 case subPathIdx: 436 start, _, _ := sub.idx.genIndex(1) 437 if start == 0 { 438 newPathStr := fmt.Sprintf("%s[%d]", pathStr, start) 439 keys, vals = bj.queryWithSubPath(keys, vals, &nPath, newPathStr) 440 } 441 case subPathRange: 442 se := sub.iRange.genRange(cnt) 443 if se[0] == 0 { 444 newPathStr := fmt.Sprintf("%s[%d]", pathStr, se[0]) 445 keys, vals = bj.queryWithSubPath(keys, vals, &nPath, newPathStr) 446 } 447 case subPathKey: 448 if sub.key == "*" { 449 for i := 0; i < cnt; i++ { 450 newPathStr := fmt.Sprintf("%s.%s", pathStr, bj.getObjectKey(i)) 451 keys, vals = bj.getObjectVal(i).queryWithSubPath(keys, vals, &nPath, newPathStr) 452 } 453 } else { 454 tmp := bj.queryValByKey(string2Slice(sub.key)) 455 newPathStr := fmt.Sprintf("%s.%s", pathStr, sub.key) 456 keys, vals = tmp.queryWithSubPath(keys, vals, &nPath, newPathStr) 457 } 458 } 459 } 460 if bj.Type == TpCodeArray { 461 cnt := bj.GetElemCnt() 462 switch sub.tp { 463 case subPathIdx: 464 idx, _, last := sub.idx.genIndex(cnt) 465 if last && idx < 0 { 466 tmp := ByteJson{Type: TpCodeLiteral, Data: []byte{LiteralNull}} 467 newPathStr := fmt.Sprintf("%s[%d]", pathStr, sub.idx.num) 468 keys = append(keys, newPathStr) 469 vals = append(vals, tmp) 470 return keys, vals 471 } 472 if idx == subPathIdxALL { 473 for i := 0; i < cnt; i++ { 474 newPathStr := fmt.Sprintf("%s[%d]", pathStr, i) 475 keys, vals = bj.getArrayElem(i).queryWithSubPath(keys, vals, &nPath, newPathStr) 476 } 477 } else { 478 newPathStr := fmt.Sprintf("%s[%d]", pathStr, idx) 479 keys, vals = bj.getArrayElem(idx).queryWithSubPath(keys, vals, &nPath, newPathStr) 480 } 481 case subPathRange: 482 se := sub.iRange.genRange(cnt) 483 if se[0] == subPathIdxErr { 484 tmp := ByteJson{Type: TpCodeLiteral, Data: []byte{LiteralNull}} 485 newPathStr := fmt.Sprintf("%s[%d to %d]", pathStr, sub.iRange.start.num, sub.iRange.end.num) 486 keys = append(keys, newPathStr) 487 vals = append(vals, tmp) 488 return keys, vals 489 } 490 for i := se[0]; i <= se[1]; i++ { 491 newPathStr := fmt.Sprintf("%s[%d]", pathStr, i) 492 keys, vals = bj.getArrayElem(i).queryWithSubPath(keys, vals, &nPath, newPathStr) 493 } 494 } 495 } 496 return keys, vals 497 } 498 499 func (bj ByteJson) unnestWithParams(out []UnnestResult, outer, recursive bool, mode string, pathStr string, this *ByteJson, filterMap map[string]struct{}) []UnnestResult { 500 if !bj.canUnnest() { 501 index, key := genIndexOrKey(pathStr) 502 tmp := UnnestResult{} 503 genUnnestResult(tmp, index, key, string2Slice(pathStr), &bj, this, filterMap) 504 out = append(out, tmp) 505 return out 506 } 507 if bj.Type == TpCodeObject && mode != "array" { 508 cnt := bj.GetElemCnt() 509 for i := 0; i < cnt; i++ { 510 key := bj.getObjectKey(i) 511 val := bj.getObjectVal(i) 512 newPathStr := fmt.Sprintf("%s.%s", pathStr, key) 513 tmp := UnnestResult{} 514 genUnnestResult(tmp, nil, key, string2Slice(newPathStr), &val, this, filterMap) 515 out = append(out, tmp) 516 if val.canUnnest() && recursive { 517 out = val.unnestWithParams(out, outer, recursive, mode, newPathStr, &val, filterMap) 518 } 519 } 520 } 521 if bj.Type == TpCodeArray && mode != "object" { 522 cnt := bj.GetElemCnt() 523 for i := 0; i < cnt; i++ { 524 val := bj.getArrayElem(i) 525 newPathStr := fmt.Sprintf("%s[%d]", pathStr, i) 526 tmp := UnnestResult{} 527 genUnnestResult(tmp, string2Slice(strconv.Itoa(i)), nil, string2Slice(newPathStr), &val, this, filterMap) 528 out = append(out, tmp) 529 if val.canUnnest() && recursive { 530 out = val.unnestWithParams(out, outer, recursive, mode, newPathStr, &val, filterMap) 531 } 532 } 533 } 534 return out 535 } 536 537 func (bj ByteJson) unnest(out []UnnestResult, path *Path, outer, recursive bool, mode string, filterMap map[string]struct{}) ([]UnnestResult, error) { 538 539 keys := make([]string, 0, 1) 540 vals := make([]ByteJson, 0, 1) 541 keys, vals = bj.queryWithSubPath(keys, vals, path, "$") 542 if len(keys) != len(vals) { 543 return nil, moerr.NewInvalidInputNoCtx("len(key) and len(val) are not equal, len(key)=%d, len(val)=%d", len(keys), len(vals)) 544 } 545 for i := 0; i < len(keys); i++ { 546 if vals[i].canUnnest() { 547 out = vals[i].unnestWithParams(out, outer, recursive, mode, keys[i], &vals[i], filterMap) 548 } 549 } 550 if len(out) == 0 && outer { 551 for i := 0; i < len(keys); i++ { 552 tmp := UnnestResult{} 553 out = append(out, tmp) 554 } 555 if _, ok := filterMap["path"]; ok { 556 for i := 0; i < len(keys); i++ { 557 out[i]["path"] = string2Slice(keys[i]) 558 } 559 } 560 if _, ok := filterMap["this"]; ok { 561 for i := 0; i < len(vals); i++ { 562 dt, err := vals[i].Marshal() 563 if err != nil { 564 return nil, err 565 } 566 out[i]["this"] = dt 567 } 568 } 569 570 } 571 return out, nil 572 } 573 574 // Unnest returns a slice of UnnestResult, each UnnestResult contains filtered data, if param filters is nil, return all fields. 575 func (bj ByteJson) Unnest(path *Path, outer, recursive bool, mode string, filterMap map[string]struct{}) ([]UnnestResult, error) { 576 if !checkMode(mode) { 577 return nil, moerr.NewInvalidInputNoCtx("mode must be one of [object, array, both]") 578 } 579 out := make([]UnnestResult, 0, 1) 580 out, err := bj.unnest(out, path, outer, recursive, mode, filterMap) 581 return out, err 582 } 583 584 func genUnnestResult(res UnnestResult, index, key, path []byte, value, this *ByteJson, filterMap map[string]struct{}) UnnestResult { 585 if _, ok := filterMap["index"]; ok { 586 res["index"] = index 587 } 588 if _, ok := filterMap["key"]; ok { 589 res["key"] = key 590 } 591 if _, ok := filterMap["path"]; ok { 592 res["path"] = path 593 } 594 if _, ok := filterMap["value"]; ok { 595 dt, _ := value.Marshal() 596 res["value"] = dt 597 } 598 if _, ok := filterMap["this"]; ok { 599 dt, _ := this.Marshal() 600 res["this"] = dt 601 } 602 return res 603 }