github.com/matrixorigin/matrixone@v0.7.0/pkg/container/bytejson/utils.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bytejson 16 17 import ( 18 "bytes" 19 "encoding/binary" 20 "encoding/json" 21 "math" 22 "reflect" 23 "sort" 24 "strconv" 25 "strings" 26 "unicode/utf8" 27 "unsafe" 28 29 "github.com/matrixorigin/matrixone/pkg/common/moerr" 30 ) 31 32 func ParseFromString(s string) (ret ByteJson, err error) { 33 if len(s) == 0 { 34 err = moerr.NewInvalidInputNoCtx("json text %s", s) 35 return 36 } 37 data := string2Slice(s) 38 ret, err = ParseFromByteSlice(data) 39 return 40 } 41 func ParseFromByteSlice(s []byte) (bj ByteJson, err error) { 42 if len(s) == 0 { 43 err = moerr.NewInvalidInputNoCtx("json text %s", string(s)) 44 return 45 } 46 if !json.Valid(s) { 47 err = moerr.NewInvalidInputNoCtx("json text %s", string(s)) 48 return 49 } 50 err = bj.UnmarshalJSON(s) 51 return 52 } 53 54 func toString(buf, data []byte) []byte { 55 return strconv.AppendQuote(buf, string(data)) 56 } 57 58 func addElem(buf []byte, in interface{}) (TpCode, []byte, error) { 59 var ( 60 tpCode TpCode 61 err error 62 ) 63 switch x := in.(type) { 64 case nil: 65 tpCode = TpCodeLiteral 66 buf = append(buf, LiteralNull) 67 case bool: 68 tpCode = TpCodeLiteral 69 lit := LiteralFalse 70 if x { 71 lit = LiteralTrue 72 } 73 buf = append(buf, lit) 74 case int64: 75 tpCode = TpCodeInt64 76 buf = addUint64(buf, uint64(x)) 77 case uint64: 78 tpCode = TpCodeUint64 79 buf = addUint64(buf, x) 80 case json.Number: 81 tpCode, buf, err = addJsonNumber(buf, x) 82 case string: 83 tpCode = TpCodeString 84 buf = addString(buf, x) 85 case ByteJson: 86 tpCode = x.Type 87 buf = append(buf, x.Data...) 88 case []interface{}: 89 tpCode = TpCodeArray 90 buf, err = addArray(buf, x) 91 case map[string]interface{}: 92 tpCode = TpCodeObject 93 buf, err = addObject(buf, x) 94 default: 95 return tpCode, nil, moerr.NewInvalidInputNoCtx("json element %v", in) 96 } 97 return tpCode, buf, err 98 } 99 100 // extend slice to have n zero bytes 101 func extendByte(buf []byte, n int) []byte { 102 buf = append(buf, make([]byte, n)...) 103 return buf 104 } 105 106 // add a uint64 to slice 107 func addUint64(buf []byte, x uint64) []byte { 108 off := len(buf) 109 buf = extendByte(buf, numberSize) 110 endian.PutUint64(buf[off:], x) 111 return buf 112 } 113 114 func addInt64(buf []byte, x int64) []byte { 115 return addUint64(buf, uint64(x)) 116 } 117 118 func addFloat64(buf []byte, num float64) []byte { 119 off := len(buf) 120 buf = extendByte(buf, numberSize) 121 endian.PutUint64(buf[off:], math.Float64bits(num)) 122 return buf 123 } 124 func addString(buf []byte, in string) []byte { 125 off := len(buf) 126 //encoding length 127 buf = extendByte(buf, binary.MaxVarintLen64) 128 inLen := binary.PutUvarint(buf[off:], uint64(len(in))) 129 //cut length 130 buf = buf[:off+inLen] 131 //add string 132 buf = append(buf, in...) 133 return buf 134 } 135 136 func addKeyEntry(buf []byte, start, keyOff int, key string) ([]byte, error) { 137 keyLen := uint32(len(key)) 138 if keyLen > math.MaxUint16 { 139 return nil, moerr.NewInvalidInputNoCtx("json key %s", key) 140 } 141 //put key offset 142 endian.PutUint32(buf[start:], uint32(keyOff)) 143 //put key length 144 endian.PutUint16(buf[start+keyOriginOff:], uint16(keyLen)) 145 buf = append(buf, key...) 146 return buf, nil 147 } 148 149 func addObject(buf []byte, in map[string]interface{}) ([]byte, error) { 150 off := len(buf) 151 buf = addUint32(buf, uint32(len(in))) 152 objStart := len(buf) 153 buf = extendByte(buf, docSizeOff) 154 keyEntryStart := len(buf) 155 buf = extendByte(buf, len(in)*keyEntrySize) 156 valEntryStart := len(buf) 157 buf = extendByte(buf, len(in)*valEntrySize) 158 kvs := make([]kv, 0, len(in)) 159 for k, v := range in { 160 kvs = append(kvs, kv{k, v}) 161 } 162 sort.Slice(kvs, func(i, j int) bool { 163 return kvs[i].key < kvs[j].key 164 }) 165 for i, kv := range kvs { 166 start := keyEntryStart + i*keyEntrySize 167 keyOff := len(buf) - off 168 var err error 169 buf, err = addKeyEntry(buf, start, keyOff, kv.key) 170 if err != nil { 171 return nil, err 172 } 173 } 174 for i, kv := range kvs { 175 var err error 176 valEntryOff := valEntryStart + i*valEntrySize 177 buf, err = addValEntry(buf, off, valEntryOff, kv.val) 178 if err != nil { 179 return nil, err 180 } 181 } 182 endian.PutUint32(buf[objStart:], uint32(len(buf)-off)) 183 return buf, nil 184 } 185 func addArray(buf []byte, in []interface{}) ([]byte, error) { 186 off := len(buf) 187 buf = addUint32(buf, uint32(len(in))) 188 arrSizeStart := len(buf) 189 buf = extendByte(buf, docSizeOff) 190 valEntryStart := len(buf) 191 buf = extendByte(buf, len(in)*valEntrySize) 192 for i, v := range in { 193 var err error 194 buf, err = addValEntry(buf, off, valEntryStart+i*valEntrySize, v) 195 if err != nil { 196 return nil, err 197 } 198 } 199 arrSize := len(buf) - off 200 endian.PutUint32(buf[arrSizeStart:], uint32(arrSize)) 201 return buf, nil 202 } 203 204 func addValEntry(buf []byte, bufStart, entryStart int, in interface{}) ([]byte, error) { 205 valStart := len(buf) 206 tpCode, buf, err := addElem(buf, in) 207 if err != nil { 208 return nil, err 209 } 210 switch tpCode { 211 case TpCodeLiteral: 212 lit := buf[valStart] 213 buf = buf[:valStart] 214 buf[entryStart] = byte(TpCodeLiteral) 215 buf[entryStart+1] = lit 216 return buf, nil 217 } 218 buf[entryStart] = byte(tpCode) 219 endian.PutUint32(buf[entryStart+1:], uint32(valStart-bufStart)) 220 return buf, nil 221 } 222 223 func addUint32(buf []byte, x uint32) []byte { 224 off := len(buf) 225 buf = extendByte(buf, 4) 226 endian.PutUint32(buf[off:], x) 227 return buf 228 } 229 230 func checkFloat64(n float64) error { 231 if math.IsInf(n, 0) || math.IsNaN(n) { 232 return moerr.NewInvalidInputNoCtx("json float64 %f", n) 233 } 234 return nil 235 } 236 237 func addJsonNumber(buf []byte, in json.Number) (TpCode, []byte, error) { 238 //check if it is a float 239 if strings.ContainsAny(string(in), "Ee.") { 240 val, err := in.Float64() 241 if err != nil { 242 return TpCodeFloat64, nil, moerr.NewInvalidInputNoCtx("json number %v", in) 243 } 244 if err = checkFloat64(val); err != nil { 245 return TpCodeFloat64, nil, err 246 } 247 return TpCodeFloat64, addFloat64(buf, val), nil 248 } 249 if val, err := in.Int64(); err == nil { //check if it is an int 250 return TpCodeInt64, addInt64(buf, val), nil 251 } 252 if val, err := strconv.ParseUint(string(in), 10, 64); err == nil { //check if it is a uint 253 return TpCodeUint64, addUint64(buf, val), nil 254 } 255 if val, err := in.Float64(); err == nil { //check if it is a float 256 if err = checkFloat64(val); err != nil { 257 return TpCodeFloat64, nil, err 258 } 259 return TpCodeFloat64, addFloat64(buf, val), nil 260 } 261 var tpCode TpCode 262 return tpCode, nil, moerr.NewInvalidInputNoCtx("json number %v", in) 263 } 264 func string2Slice(s string) []byte { 265 str := (*reflect.StringHeader)(unsafe.Pointer(&s)) 266 var ret []byte 267 retPtr := (*reflect.SliceHeader)(unsafe.Pointer(&ret)) 268 retPtr.Data = str.Data 269 retPtr.Len = str.Len 270 retPtr.Cap = str.Len 271 return ret 272 } 273 func calStrLen(buf []byte) (int, int) { 274 strLen, lenLen := uint64(buf[0]), 1 275 if strLen >= utf8.RuneSelf { 276 strLen, lenLen = binary.Uvarint(buf) 277 } 278 return int(strLen), lenLen 279 } 280 func isIdentifier(s string) bool { 281 if len(s) == 0 { 282 return false 283 } 284 for i := 0; i < len(s); i++ { 285 if (i != 0 && s[i] >= '0' && s[i] <= '9') || 286 (s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z') || 287 s[i] == '_' || s[i] == '$' || s[i] >= 0x80 { 288 continue 289 } 290 return false 291 } 292 return true 293 } 294 295 func ParseJsonPath(path string) (p Path, err error) { 296 pg := newPathGenerator(path) 297 pg.trimSpace() 298 if !pg.hasNext() || pg.next() != '$' { 299 err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path) 300 } 301 pg.trimSpace() 302 subPaths := make([]subPath, 0, 8) 303 var ok bool 304 for pg.hasNext() { 305 switch pg.front() { 306 case '.': 307 subPaths, ok = pg.generateKey(subPaths) 308 case '[': 309 subPaths, ok = pg.generateIndex(subPaths) 310 case '*': 311 subPaths, ok = pg.generateDoubleStar(subPaths) 312 default: 313 ok = false 314 } 315 if !ok { 316 err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path) 317 return 318 } 319 pg.trimSpace() 320 } 321 322 if len(subPaths) > 0 && subPaths[len(subPaths)-1].tp == subPathDoubleStar { 323 err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path) 324 return 325 } 326 p.init(subPaths) 327 return 328 } 329 330 func addByteElem(buf []byte, entryStart int, elems []ByteJson) []byte { 331 for i, elem := range elems { 332 buf[entryStart+i*valEntrySize] = byte(elem.Type) 333 if elem.Type == TpCodeLiteral { 334 buf[entryStart+i*valEntrySize+valTypeSize] = elem.Data[0] 335 } else { 336 endian.PutUint32(buf[entryStart+i*valEntrySize+valTypeSize:], uint32(len(buf))) 337 buf = append(buf, elem.Data...) 338 } 339 } 340 return buf 341 } 342 343 func mergeToArray(origin []ByteJson) *ByteJson { 344 totalSize := headerSize + len(origin)*valEntrySize 345 for _, el := range origin { 346 if el.Type != TpCodeLiteral { 347 totalSize += len(el.Data) 348 } 349 } 350 buf := make([]byte, headerSize+len(origin)*valEntrySize, totalSize) 351 endian.PutUint32(buf, uint32(len(origin))) 352 endian.PutUint32(buf[docSizeOff:], uint32(totalSize)) 353 buf = addByteElem(buf, headerSize, origin) 354 return &ByteJson{Type: TpCodeArray, Data: buf} 355 } 356 357 // check unnest mode 358 func checkMode(mode string) bool { 359 if mode == "both" || mode == "array" || mode == "object" { 360 return true 361 } 362 return false 363 } 364 365 func genIndexOrKey(pathStr string) ([]byte, []byte) { 366 if pathStr[len(pathStr)-1] == ']' { 367 // find last '[' 368 idx := strings.LastIndex(pathStr, "[") 369 return string2Slice(pathStr[idx : len(pathStr)-1]), nil 370 } 371 // find last '.' 372 idx := strings.LastIndex(pathStr, ".") 373 return nil, string2Slice(pathStr[idx+1:]) 374 } 375 376 // for test 377 func (r UnnestResult) String() string { 378 var buf bytes.Buffer 379 if val, ok := r["key"]; ok && val != nil { 380 buf.WriteString("key: ") 381 buf.WriteString(string(val) + ", ") 382 } 383 if val, ok := r["path"]; ok && val != nil { 384 buf.WriteString("path: ") 385 buf.WriteString(string(val) + ", ") 386 } 387 if val, ok := r["index"]; ok && val != nil { 388 buf.WriteString("index: ") 389 buf.WriteString(string(val) + ", ") 390 } 391 if val, ok := r["value"]; ok && val != nil { 392 buf.WriteString("value: ") 393 bj := ByteJson{} 394 bj.Unmarshal(val) 395 val, _ = bj.MarshalJSON() 396 buf.WriteString(string(val) + ", ") 397 } 398 if val, ok := r["this"]; ok && val != nil { 399 buf.WriteString("this: ") 400 bj := ByteJson{} 401 bj.Unmarshal(val) 402 val, _ = bj.MarshalJSON() 403 buf.WriteString(string(val)) 404 } 405 return buf.String() 406 } 407 408 func checkAllNull(vals []ByteJson) bool { 409 allNull := true 410 for _, val := range vals { 411 if !val.IsNull() { 412 allNull = false 413 break 414 } 415 } 416 return allNull 417 }