github.com/matrixorigin/matrixone@v1.2.0/pkg/container/bytejson/utils.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bytejson
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/binary"
    20  	"encoding/json"
    21  	"math"
    22  	"sort"
    23  	"strconv"
    24  	"strings"
    25  	"unicode/utf8"
    26  
    27  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    28  	"github.com/matrixorigin/matrixone/pkg/common/util"
    29  )
    30  
    31  func ParseFromString(s string) (ret ByteJson, err error) {
    32  	if len(s) == 0 {
    33  		err = moerr.NewInvalidInputNoCtx("json text %s", s)
    34  		return
    35  	}
    36  	data := util.UnsafeStringToBytes(s)
    37  	ret, err = ParseFromByteSlice(data)
    38  	return
    39  }
    40  func ParseFromByteSlice(s []byte) (bj ByteJson, err error) {
    41  	if len(s) == 0 {
    42  		err = moerr.NewInvalidInputNoCtx("json text %s", string(s))
    43  		return
    44  	}
    45  	if !json.Valid(s) {
    46  		err = moerr.NewInvalidInputNoCtx("json text %s", string(s))
    47  		return
    48  	}
    49  	err = bj.UnmarshalJSON(s)
    50  	return
    51  }
    52  
    53  func toString(buf, data []byte) []byte {
    54  	return strconv.AppendQuote(buf, string(data))
    55  }
    56  
    57  func addElem(buf []byte, in interface{}) (TpCode, []byte, error) {
    58  	var (
    59  		tpCode TpCode
    60  		err    error
    61  	)
    62  	switch x := in.(type) {
    63  	case nil:
    64  		tpCode = TpCodeLiteral
    65  		buf = append(buf, LiteralNull)
    66  	case bool:
    67  		tpCode = TpCodeLiteral
    68  		lit := LiteralFalse
    69  		if x {
    70  			lit = LiteralTrue
    71  		}
    72  		buf = append(buf, lit)
    73  	case int64:
    74  		tpCode = TpCodeInt64
    75  		buf = addUint64(buf, uint64(x))
    76  	case uint64:
    77  		tpCode = TpCodeUint64
    78  		buf = addUint64(buf, x)
    79  	case json.Number:
    80  		tpCode, buf, err = addJsonNumber(buf, x)
    81  	case string:
    82  		tpCode = TpCodeString
    83  		buf = addString(buf, x)
    84  	case ByteJson:
    85  		tpCode = x.Type
    86  		buf = append(buf, x.Data...)
    87  	case []interface{}:
    88  		tpCode = TpCodeArray
    89  		buf, err = addArray(buf, x)
    90  	case map[string]interface{}:
    91  		tpCode = TpCodeObject
    92  		buf, err = addObject(buf, x)
    93  	default:
    94  		return tpCode, nil, moerr.NewInvalidInputNoCtx("json element %v", in)
    95  	}
    96  	return tpCode, buf, err
    97  }
    98  
    99  // extend slice to have n zero bytes
   100  func extendByte(buf []byte, n int) []byte {
   101  	buf = append(buf, make([]byte, n)...)
   102  	return buf
   103  }
   104  
   105  // add a uint64 to slice
   106  func addUint64(buf []byte, x uint64) []byte {
   107  	off := len(buf)
   108  	buf = extendByte(buf, numberSize)
   109  	endian.PutUint64(buf[off:], x)
   110  	return buf
   111  }
   112  
   113  func addInt64(buf []byte, x int64) []byte {
   114  	return addUint64(buf, uint64(x))
   115  }
   116  
   117  func addFloat64(buf []byte, num float64) []byte {
   118  	off := len(buf)
   119  	buf = extendByte(buf, numberSize)
   120  	endian.PutUint64(buf[off:], math.Float64bits(num))
   121  	return buf
   122  }
   123  func addString(buf []byte, in string) []byte {
   124  	off := len(buf)
   125  	//encoding length
   126  	buf = extendByte(buf, binary.MaxVarintLen64)
   127  	inLen := binary.PutUvarint(buf[off:], uint64(len(in)))
   128  	//cut length
   129  	buf = buf[:off+inLen]
   130  	//add string
   131  	buf = append(buf, in...)
   132  	return buf
   133  }
   134  
   135  func addKeyEntry(buf []byte, start, keyOff int, key string) ([]byte, error) {
   136  	keyLen := uint32(len(key))
   137  	if keyLen > math.MaxUint16 {
   138  		return nil, moerr.NewInvalidInputNoCtx("json key %s", key)
   139  	}
   140  	//put key offset
   141  	endian.PutUint32(buf[start:], uint32(keyOff))
   142  	//put key length
   143  	endian.PutUint16(buf[start+keyOriginOff:], uint16(keyLen))
   144  	buf = append(buf, key...)
   145  	return buf, nil
   146  }
   147  
   148  func addObject(buf []byte, in map[string]interface{}) ([]byte, error) {
   149  	off := len(buf)
   150  	buf = addUint32(buf, uint32(len(in)))
   151  	objStart := len(buf)
   152  	buf = extendByte(buf, docSizeOff)
   153  	keyEntryStart := len(buf)
   154  	buf = extendByte(buf, len(in)*keyEntrySize)
   155  	valEntryStart := len(buf)
   156  	buf = extendByte(buf, len(in)*valEntrySize)
   157  	kvs := make([]kv, 0, len(in))
   158  	for k, v := range in {
   159  		kvs = append(kvs, kv{k, v})
   160  	}
   161  	sort.Slice(kvs, func(i, j int) bool {
   162  		return kvs[i].key < kvs[j].key
   163  	})
   164  	for i, kv := range kvs {
   165  		start := keyEntryStart + i*keyEntrySize
   166  		keyOff := len(buf) - off
   167  		var err error
   168  		buf, err = addKeyEntry(buf, start, keyOff, kv.key)
   169  		if err != nil {
   170  			return nil, err
   171  		}
   172  	}
   173  	for i, kv := range kvs {
   174  		var err error
   175  		valEntryOff := valEntryStart + i*valEntrySize
   176  		buf, err = addValEntry(buf, off, valEntryOff, kv.val)
   177  		if err != nil {
   178  			return nil, err
   179  		}
   180  	}
   181  	endian.PutUint32(buf[objStart:], uint32(len(buf)-off))
   182  	return buf, nil
   183  }
   184  func addArray(buf []byte, in []interface{}) ([]byte, error) {
   185  	off := len(buf)
   186  	buf = addUint32(buf, uint32(len(in)))
   187  	arrSizeStart := len(buf)
   188  	buf = extendByte(buf, docSizeOff)
   189  	valEntryStart := len(buf)
   190  	buf = extendByte(buf, len(in)*valEntrySize)
   191  	for i, v := range in {
   192  		var err error
   193  		buf, err = addValEntry(buf, off, valEntryStart+i*valEntrySize, v)
   194  		if err != nil {
   195  			return nil, err
   196  		}
   197  	}
   198  	arrSize := len(buf) - off
   199  	endian.PutUint32(buf[arrSizeStart:], uint32(arrSize))
   200  	return buf, nil
   201  }
   202  
   203  func addValEntry(buf []byte, bufStart, entryStart int, in interface{}) ([]byte, error) {
   204  	valStart := len(buf)
   205  	tpCode, buf, err := addElem(buf, in)
   206  	if err != nil {
   207  		return nil, err
   208  	}
   209  	switch tpCode {
   210  	case TpCodeLiteral:
   211  		lit := buf[valStart]
   212  		buf = buf[:valStart]
   213  		buf[entryStart] = byte(TpCodeLiteral)
   214  		buf[entryStart+1] = lit
   215  		return buf, nil
   216  	}
   217  	buf[entryStart] = byte(tpCode)
   218  	endian.PutUint32(buf[entryStart+1:], uint32(valStart-bufStart))
   219  	return buf, nil
   220  }
   221  
   222  func addUint32(buf []byte, x uint32) []byte {
   223  	off := len(buf)
   224  	buf = extendByte(buf, 4)
   225  	endian.PutUint32(buf[off:], x)
   226  	return buf
   227  }
   228  
   229  func checkFloat64(n float64) error {
   230  	if math.IsInf(n, 0) || math.IsNaN(n) {
   231  		return moerr.NewInvalidInputNoCtx("json float64 %f", n)
   232  	}
   233  	return nil
   234  }
   235  
   236  func addJsonNumber(buf []byte, in json.Number) (TpCode, []byte, error) {
   237  	//check if it is a float
   238  	if strings.ContainsAny(string(in), "Ee.") {
   239  		val, err := in.Float64()
   240  		if err != nil {
   241  			return TpCodeFloat64, nil, moerr.NewInvalidInputNoCtx("json number %v", in)
   242  		}
   243  		if err = checkFloat64(val); err != nil {
   244  			return TpCodeFloat64, nil, err
   245  		}
   246  		return TpCodeFloat64, addFloat64(buf, val), nil
   247  	}
   248  	if val, err := in.Int64(); err == nil { //check if it is an int
   249  		return TpCodeInt64, addInt64(buf, val), nil
   250  	}
   251  	if val, err := strconv.ParseUint(string(in), 10, 64); err == nil { //check if it is a uint
   252  		return TpCodeUint64, addUint64(buf, val), nil
   253  	}
   254  	if val, err := in.Float64(); err == nil { //check if it is a float
   255  		if err = checkFloat64(val); err != nil {
   256  			return TpCodeFloat64, nil, err
   257  		}
   258  		return TpCodeFloat64, addFloat64(buf, val), nil
   259  	}
   260  	var tpCode TpCode
   261  	return tpCode, nil, moerr.NewInvalidInputNoCtx("json number %v", in)
   262  }
   263  
   264  func calStrLen(buf []byte) (int, int) {
   265  	strLen, lenLen := uint64(buf[0]), 1
   266  	if strLen >= utf8.RuneSelf {
   267  		strLen, lenLen = binary.Uvarint(buf)
   268  	}
   269  	return int(strLen), lenLen
   270  }
   271  
   272  func isIdentifier(s string) bool {
   273  	if len(s) == 0 {
   274  		return false
   275  	}
   276  	for i := 0; i < len(s); i++ {
   277  		if (i != 0 && s[i] >= '0' && s[i] <= '9') ||
   278  			(s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z') ||
   279  			s[i] == '_' || s[i] == '$' || s[i] >= 0x80 {
   280  			continue
   281  		}
   282  		return false
   283  	}
   284  	return true
   285  }
   286  
   287  func ParseJsonPath(path string) (p Path, err error) {
   288  	pg := newPathGenerator(path)
   289  	pg.trimSpace()
   290  	if !pg.hasNext() || pg.next() != '$' {
   291  		err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path)
   292  	}
   293  	pg.trimSpace()
   294  	subPaths := make([]subPath, 0, 8)
   295  	var ok bool
   296  	for pg.hasNext() {
   297  		switch pg.front() {
   298  		case '.':
   299  			subPaths, ok = pg.generateKey(subPaths)
   300  		case '[':
   301  			subPaths, ok = pg.generateIndex(subPaths)
   302  		case '*':
   303  			subPaths, ok = pg.generateDoubleStar(subPaths)
   304  		default:
   305  			ok = false
   306  		}
   307  		if !ok {
   308  			err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path)
   309  			return
   310  		}
   311  		pg.trimSpace()
   312  	}
   313  
   314  	if len(subPaths) > 0 && subPaths[len(subPaths)-1].tp == subPathDoubleStar {
   315  		err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path)
   316  		return
   317  	}
   318  	p.init(subPaths)
   319  	return
   320  }
   321  
   322  func addByteElem(buf []byte, entryStart int, elems []ByteJson) []byte {
   323  	for i, elem := range elems {
   324  		buf[entryStart+i*valEntrySize] = byte(elem.Type)
   325  		if elem.Type == TpCodeLiteral {
   326  			buf[entryStart+i*valEntrySize+valTypeSize] = elem.Data[0]
   327  		} else {
   328  			endian.PutUint32(buf[entryStart+i*valEntrySize+valTypeSize:], uint32(len(buf)))
   329  			buf = append(buf, elem.Data...)
   330  		}
   331  	}
   332  	return buf
   333  }
   334  
   335  func mergeToArray(origin []ByteJson) *ByteJson {
   336  	totalSize := headerSize + len(origin)*valEntrySize
   337  	for _, el := range origin {
   338  		if el.Type != TpCodeLiteral {
   339  			totalSize += len(el.Data)
   340  		}
   341  	}
   342  	buf := make([]byte, headerSize+len(origin)*valEntrySize, totalSize)
   343  	endian.PutUint32(buf, uint32(len(origin)))
   344  	endian.PutUint32(buf[docSizeOff:], uint32(totalSize))
   345  	buf = addByteElem(buf, headerSize, origin)
   346  	return &ByteJson{Type: TpCodeArray, Data: buf}
   347  }
   348  
   349  // check unnest mode
   350  func checkMode(mode string) bool {
   351  	if mode == "both" || mode == "array" || mode == "object" {
   352  		return true
   353  	}
   354  	return false
   355  }
   356  
   357  func genIndexOrKey(pathStr string) ([]byte, []byte) {
   358  	if pathStr[len(pathStr)-1] == ']' {
   359  		// find last '['
   360  		idx := strings.LastIndex(pathStr, "[")
   361  		return util.UnsafeStringToBytes(pathStr[idx : len(pathStr)-1]), nil
   362  	}
   363  	// find last '.'
   364  	idx := strings.LastIndex(pathStr, ".")
   365  	return nil, util.UnsafeStringToBytes(pathStr[idx+1:])
   366  }
   367  
   368  // for test
   369  func (r UnnestResult) String() string {
   370  	var buf bytes.Buffer
   371  	if val, ok := r["key"]; ok && val != nil {
   372  		buf.WriteString("key: ")
   373  		buf.WriteString(string(val) + ", ")
   374  	}
   375  	if val, ok := r["path"]; ok && val != nil {
   376  		buf.WriteString("path: ")
   377  		buf.WriteString(string(val) + ", ")
   378  	}
   379  	if val, ok := r["index"]; ok && val != nil {
   380  		buf.WriteString("index: ")
   381  		buf.WriteString(string(val) + ", ")
   382  	}
   383  	if val, ok := r["value"]; ok && val != nil {
   384  		buf.WriteString("value: ")
   385  		bj := ByteJson{}
   386  		bj.Unmarshal(val)
   387  		val, _ = bj.MarshalJSON()
   388  		buf.WriteString(string(val) + ", ")
   389  	}
   390  	if val, ok := r["this"]; ok && val != nil {
   391  		buf.WriteString("this: ")
   392  		bj := ByteJson{}
   393  		bj.Unmarshal(val)
   394  		val, _ = bj.MarshalJSON()
   395  		buf.WriteString(string(val))
   396  	}
   397  	return buf.String()
   398  }
   399  
   400  func checkAllNull(vals []ByteJson) bool {
   401  	allNull := true
   402  	for _, val := range vals {
   403  		if !val.IsNull() {
   404  			allNull = false
   405  			break
   406  		}
   407  	}
   408  	return allNull
   409  }