github.com/matrixorigin/matrixone@v1.2.0/pkg/container/bytejson/utils.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bytejson 16 17 import ( 18 "bytes" 19 "encoding/binary" 20 "encoding/json" 21 "math" 22 "sort" 23 "strconv" 24 "strings" 25 "unicode/utf8" 26 27 "github.com/matrixorigin/matrixone/pkg/common/moerr" 28 "github.com/matrixorigin/matrixone/pkg/common/util" 29 ) 30 31 func ParseFromString(s string) (ret ByteJson, err error) { 32 if len(s) == 0 { 33 err = moerr.NewInvalidInputNoCtx("json text %s", s) 34 return 35 } 36 data := util.UnsafeStringToBytes(s) 37 ret, err = ParseFromByteSlice(data) 38 return 39 } 40 func ParseFromByteSlice(s []byte) (bj ByteJson, err error) { 41 if len(s) == 0 { 42 err = moerr.NewInvalidInputNoCtx("json text %s", string(s)) 43 return 44 } 45 if !json.Valid(s) { 46 err = moerr.NewInvalidInputNoCtx("json text %s", string(s)) 47 return 48 } 49 err = bj.UnmarshalJSON(s) 50 return 51 } 52 53 func toString(buf, data []byte) []byte { 54 return strconv.AppendQuote(buf, string(data)) 55 } 56 57 func addElem(buf []byte, in interface{}) (TpCode, []byte, error) { 58 var ( 59 tpCode TpCode 60 err error 61 ) 62 switch x := in.(type) { 63 case nil: 64 tpCode = TpCodeLiteral 65 buf = append(buf, LiteralNull) 66 case bool: 67 tpCode = TpCodeLiteral 68 lit := LiteralFalse 69 if x { 70 lit = LiteralTrue 71 } 72 buf = append(buf, lit) 73 case int64: 74 tpCode = TpCodeInt64 75 buf = addUint64(buf, uint64(x)) 76 case uint64: 77 tpCode = TpCodeUint64 78 buf = addUint64(buf, x) 79 case json.Number: 80 tpCode, buf, err = addJsonNumber(buf, x) 81 case string: 82 tpCode = TpCodeString 83 buf = addString(buf, x) 84 case ByteJson: 85 tpCode = x.Type 86 buf = append(buf, x.Data...) 87 case []interface{}: 88 tpCode = TpCodeArray 89 buf, err = addArray(buf, x) 90 case map[string]interface{}: 91 tpCode = TpCodeObject 92 buf, err = addObject(buf, x) 93 default: 94 return tpCode, nil, moerr.NewInvalidInputNoCtx("json element %v", in) 95 } 96 return tpCode, buf, err 97 } 98 99 // extend slice to have n zero bytes 100 func extendByte(buf []byte, n int) []byte { 101 buf = append(buf, make([]byte, n)...) 102 return buf 103 } 104 105 // add a uint64 to slice 106 func addUint64(buf []byte, x uint64) []byte { 107 off := len(buf) 108 buf = extendByte(buf, numberSize) 109 endian.PutUint64(buf[off:], x) 110 return buf 111 } 112 113 func addInt64(buf []byte, x int64) []byte { 114 return addUint64(buf, uint64(x)) 115 } 116 117 func addFloat64(buf []byte, num float64) []byte { 118 off := len(buf) 119 buf = extendByte(buf, numberSize) 120 endian.PutUint64(buf[off:], math.Float64bits(num)) 121 return buf 122 } 123 func addString(buf []byte, in string) []byte { 124 off := len(buf) 125 //encoding length 126 buf = extendByte(buf, binary.MaxVarintLen64) 127 inLen := binary.PutUvarint(buf[off:], uint64(len(in))) 128 //cut length 129 buf = buf[:off+inLen] 130 //add string 131 buf = append(buf, in...) 132 return buf 133 } 134 135 func addKeyEntry(buf []byte, start, keyOff int, key string) ([]byte, error) { 136 keyLen := uint32(len(key)) 137 if keyLen > math.MaxUint16 { 138 return nil, moerr.NewInvalidInputNoCtx("json key %s", key) 139 } 140 //put key offset 141 endian.PutUint32(buf[start:], uint32(keyOff)) 142 //put key length 143 endian.PutUint16(buf[start+keyOriginOff:], uint16(keyLen)) 144 buf = append(buf, key...) 145 return buf, nil 146 } 147 148 func addObject(buf []byte, in map[string]interface{}) ([]byte, error) { 149 off := len(buf) 150 buf = addUint32(buf, uint32(len(in))) 151 objStart := len(buf) 152 buf = extendByte(buf, docSizeOff) 153 keyEntryStart := len(buf) 154 buf = extendByte(buf, len(in)*keyEntrySize) 155 valEntryStart := len(buf) 156 buf = extendByte(buf, len(in)*valEntrySize) 157 kvs := make([]kv, 0, len(in)) 158 for k, v := range in { 159 kvs = append(kvs, kv{k, v}) 160 } 161 sort.Slice(kvs, func(i, j int) bool { 162 return kvs[i].key < kvs[j].key 163 }) 164 for i, kv := range kvs { 165 start := keyEntryStart + i*keyEntrySize 166 keyOff := len(buf) - off 167 var err error 168 buf, err = addKeyEntry(buf, start, keyOff, kv.key) 169 if err != nil { 170 return nil, err 171 } 172 } 173 for i, kv := range kvs { 174 var err error 175 valEntryOff := valEntryStart + i*valEntrySize 176 buf, err = addValEntry(buf, off, valEntryOff, kv.val) 177 if err != nil { 178 return nil, err 179 } 180 } 181 endian.PutUint32(buf[objStart:], uint32(len(buf)-off)) 182 return buf, nil 183 } 184 func addArray(buf []byte, in []interface{}) ([]byte, error) { 185 off := len(buf) 186 buf = addUint32(buf, uint32(len(in))) 187 arrSizeStart := len(buf) 188 buf = extendByte(buf, docSizeOff) 189 valEntryStart := len(buf) 190 buf = extendByte(buf, len(in)*valEntrySize) 191 for i, v := range in { 192 var err error 193 buf, err = addValEntry(buf, off, valEntryStart+i*valEntrySize, v) 194 if err != nil { 195 return nil, err 196 } 197 } 198 arrSize := len(buf) - off 199 endian.PutUint32(buf[arrSizeStart:], uint32(arrSize)) 200 return buf, nil 201 } 202 203 func addValEntry(buf []byte, bufStart, entryStart int, in interface{}) ([]byte, error) { 204 valStart := len(buf) 205 tpCode, buf, err := addElem(buf, in) 206 if err != nil { 207 return nil, err 208 } 209 switch tpCode { 210 case TpCodeLiteral: 211 lit := buf[valStart] 212 buf = buf[:valStart] 213 buf[entryStart] = byte(TpCodeLiteral) 214 buf[entryStart+1] = lit 215 return buf, nil 216 } 217 buf[entryStart] = byte(tpCode) 218 endian.PutUint32(buf[entryStart+1:], uint32(valStart-bufStart)) 219 return buf, nil 220 } 221 222 func addUint32(buf []byte, x uint32) []byte { 223 off := len(buf) 224 buf = extendByte(buf, 4) 225 endian.PutUint32(buf[off:], x) 226 return buf 227 } 228 229 func checkFloat64(n float64) error { 230 if math.IsInf(n, 0) || math.IsNaN(n) { 231 return moerr.NewInvalidInputNoCtx("json float64 %f", n) 232 } 233 return nil 234 } 235 236 func addJsonNumber(buf []byte, in json.Number) (TpCode, []byte, error) { 237 //check if it is a float 238 if strings.ContainsAny(string(in), "Ee.") { 239 val, err := in.Float64() 240 if err != nil { 241 return TpCodeFloat64, nil, moerr.NewInvalidInputNoCtx("json number %v", in) 242 } 243 if err = checkFloat64(val); err != nil { 244 return TpCodeFloat64, nil, err 245 } 246 return TpCodeFloat64, addFloat64(buf, val), nil 247 } 248 if val, err := in.Int64(); err == nil { //check if it is an int 249 return TpCodeInt64, addInt64(buf, val), nil 250 } 251 if val, err := strconv.ParseUint(string(in), 10, 64); err == nil { //check if it is a uint 252 return TpCodeUint64, addUint64(buf, val), nil 253 } 254 if val, err := in.Float64(); err == nil { //check if it is a float 255 if err = checkFloat64(val); err != nil { 256 return TpCodeFloat64, nil, err 257 } 258 return TpCodeFloat64, addFloat64(buf, val), nil 259 } 260 var tpCode TpCode 261 return tpCode, nil, moerr.NewInvalidInputNoCtx("json number %v", in) 262 } 263 264 func calStrLen(buf []byte) (int, int) { 265 strLen, lenLen := uint64(buf[0]), 1 266 if strLen >= utf8.RuneSelf { 267 strLen, lenLen = binary.Uvarint(buf) 268 } 269 return int(strLen), lenLen 270 } 271 272 func isIdentifier(s string) bool { 273 if len(s) == 0 { 274 return false 275 } 276 for i := 0; i < len(s); i++ { 277 if (i != 0 && s[i] >= '0' && s[i] <= '9') || 278 (s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z') || 279 s[i] == '_' || s[i] == '$' || s[i] >= 0x80 { 280 continue 281 } 282 return false 283 } 284 return true 285 } 286 287 func ParseJsonPath(path string) (p Path, err error) { 288 pg := newPathGenerator(path) 289 pg.trimSpace() 290 if !pg.hasNext() || pg.next() != '$' { 291 err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path) 292 } 293 pg.trimSpace() 294 subPaths := make([]subPath, 0, 8) 295 var ok bool 296 for pg.hasNext() { 297 switch pg.front() { 298 case '.': 299 subPaths, ok = pg.generateKey(subPaths) 300 case '[': 301 subPaths, ok = pg.generateIndex(subPaths) 302 case '*': 303 subPaths, ok = pg.generateDoubleStar(subPaths) 304 default: 305 ok = false 306 } 307 if !ok { 308 err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path) 309 return 310 } 311 pg.trimSpace() 312 } 313 314 if len(subPaths) > 0 && subPaths[len(subPaths)-1].tp == subPathDoubleStar { 315 err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path) 316 return 317 } 318 p.init(subPaths) 319 return 320 } 321 322 func addByteElem(buf []byte, entryStart int, elems []ByteJson) []byte { 323 for i, elem := range elems { 324 buf[entryStart+i*valEntrySize] = byte(elem.Type) 325 if elem.Type == TpCodeLiteral { 326 buf[entryStart+i*valEntrySize+valTypeSize] = elem.Data[0] 327 } else { 328 endian.PutUint32(buf[entryStart+i*valEntrySize+valTypeSize:], uint32(len(buf))) 329 buf = append(buf, elem.Data...) 330 } 331 } 332 return buf 333 } 334 335 func mergeToArray(origin []ByteJson) *ByteJson { 336 totalSize := headerSize + len(origin)*valEntrySize 337 for _, el := range origin { 338 if el.Type != TpCodeLiteral { 339 totalSize += len(el.Data) 340 } 341 } 342 buf := make([]byte, headerSize+len(origin)*valEntrySize, totalSize) 343 endian.PutUint32(buf, uint32(len(origin))) 344 endian.PutUint32(buf[docSizeOff:], uint32(totalSize)) 345 buf = addByteElem(buf, headerSize, origin) 346 return &ByteJson{Type: TpCodeArray, Data: buf} 347 } 348 349 // check unnest mode 350 func checkMode(mode string) bool { 351 if mode == "both" || mode == "array" || mode == "object" { 352 return true 353 } 354 return false 355 } 356 357 func genIndexOrKey(pathStr string) ([]byte, []byte) { 358 if pathStr[len(pathStr)-1] == ']' { 359 // find last '[' 360 idx := strings.LastIndex(pathStr, "[") 361 return util.UnsafeStringToBytes(pathStr[idx : len(pathStr)-1]), nil 362 } 363 // find last '.' 364 idx := strings.LastIndex(pathStr, ".") 365 return nil, util.UnsafeStringToBytes(pathStr[idx+1:]) 366 } 367 368 // for test 369 func (r UnnestResult) String() string { 370 var buf bytes.Buffer 371 if val, ok := r["key"]; ok && val != nil { 372 buf.WriteString("key: ") 373 buf.WriteString(string(val) + ", ") 374 } 375 if val, ok := r["path"]; ok && val != nil { 376 buf.WriteString("path: ") 377 buf.WriteString(string(val) + ", ") 378 } 379 if val, ok := r["index"]; ok && val != nil { 380 buf.WriteString("index: ") 381 buf.WriteString(string(val) + ", ") 382 } 383 if val, ok := r["value"]; ok && val != nil { 384 buf.WriteString("value: ") 385 bj := ByteJson{} 386 bj.Unmarshal(val) 387 val, _ = bj.MarshalJSON() 388 buf.WriteString(string(val) + ", ") 389 } 390 if val, ok := r["this"]; ok && val != nil { 391 buf.WriteString("this: ") 392 bj := ByteJson{} 393 bj.Unmarshal(val) 394 val, _ = bj.MarshalJSON() 395 buf.WriteString(string(val)) 396 } 397 return buf.String() 398 } 399 400 func checkAllNull(vals []ByteJson) bool { 401 allNull := true 402 for _, val := range vals { 403 if !val.IsNull() { 404 allNull = false 405 break 406 } 407 } 408 return allNull 409 }