github.com/matrixorigin/matrixone@v0.7.0/pkg/container/bytejson/utils.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bytejson
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/binary"
    20  	"encoding/json"
    21  	"math"
    22  	"reflect"
    23  	"sort"
    24  	"strconv"
    25  	"strings"
    26  	"unicode/utf8"
    27  	"unsafe"
    28  
    29  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    30  )
    31  
    32  func ParseFromString(s string) (ret ByteJson, err error) {
    33  	if len(s) == 0 {
    34  		err = moerr.NewInvalidInputNoCtx("json text %s", s)
    35  		return
    36  	}
    37  	data := string2Slice(s)
    38  	ret, err = ParseFromByteSlice(data)
    39  	return
    40  }
    41  func ParseFromByteSlice(s []byte) (bj ByteJson, err error) {
    42  	if len(s) == 0 {
    43  		err = moerr.NewInvalidInputNoCtx("json text %s", string(s))
    44  		return
    45  	}
    46  	if !json.Valid(s) {
    47  		err = moerr.NewInvalidInputNoCtx("json text %s", string(s))
    48  		return
    49  	}
    50  	err = bj.UnmarshalJSON(s)
    51  	return
    52  }
    53  
    54  func toString(buf, data []byte) []byte {
    55  	return strconv.AppendQuote(buf, string(data))
    56  }
    57  
    58  func addElem(buf []byte, in interface{}) (TpCode, []byte, error) {
    59  	var (
    60  		tpCode TpCode
    61  		err    error
    62  	)
    63  	switch x := in.(type) {
    64  	case nil:
    65  		tpCode = TpCodeLiteral
    66  		buf = append(buf, LiteralNull)
    67  	case bool:
    68  		tpCode = TpCodeLiteral
    69  		lit := LiteralFalse
    70  		if x {
    71  			lit = LiteralTrue
    72  		}
    73  		buf = append(buf, lit)
    74  	case int64:
    75  		tpCode = TpCodeInt64
    76  		buf = addUint64(buf, uint64(x))
    77  	case uint64:
    78  		tpCode = TpCodeUint64
    79  		buf = addUint64(buf, x)
    80  	case json.Number:
    81  		tpCode, buf, err = addJsonNumber(buf, x)
    82  	case string:
    83  		tpCode = TpCodeString
    84  		buf = addString(buf, x)
    85  	case ByteJson:
    86  		tpCode = x.Type
    87  		buf = append(buf, x.Data...)
    88  	case []interface{}:
    89  		tpCode = TpCodeArray
    90  		buf, err = addArray(buf, x)
    91  	case map[string]interface{}:
    92  		tpCode = TpCodeObject
    93  		buf, err = addObject(buf, x)
    94  	default:
    95  		return tpCode, nil, moerr.NewInvalidInputNoCtx("json element %v", in)
    96  	}
    97  	return tpCode, buf, err
    98  }
    99  
   100  // extend slice to have n zero bytes
   101  func extendByte(buf []byte, n int) []byte {
   102  	buf = append(buf, make([]byte, n)...)
   103  	return buf
   104  }
   105  
   106  // add a uint64 to slice
   107  func addUint64(buf []byte, x uint64) []byte {
   108  	off := len(buf)
   109  	buf = extendByte(buf, numberSize)
   110  	endian.PutUint64(buf[off:], x)
   111  	return buf
   112  }
   113  
   114  func addInt64(buf []byte, x int64) []byte {
   115  	return addUint64(buf, uint64(x))
   116  }
   117  
   118  func addFloat64(buf []byte, num float64) []byte {
   119  	off := len(buf)
   120  	buf = extendByte(buf, numberSize)
   121  	endian.PutUint64(buf[off:], math.Float64bits(num))
   122  	return buf
   123  }
   124  func addString(buf []byte, in string) []byte {
   125  	off := len(buf)
   126  	//encoding length
   127  	buf = extendByte(buf, binary.MaxVarintLen64)
   128  	inLen := binary.PutUvarint(buf[off:], uint64(len(in)))
   129  	//cut length
   130  	buf = buf[:off+inLen]
   131  	//add string
   132  	buf = append(buf, in...)
   133  	return buf
   134  }
   135  
   136  func addKeyEntry(buf []byte, start, keyOff int, key string) ([]byte, error) {
   137  	keyLen := uint32(len(key))
   138  	if keyLen > math.MaxUint16 {
   139  		return nil, moerr.NewInvalidInputNoCtx("json key %s", key)
   140  	}
   141  	//put key offset
   142  	endian.PutUint32(buf[start:], uint32(keyOff))
   143  	//put key length
   144  	endian.PutUint16(buf[start+keyOriginOff:], uint16(keyLen))
   145  	buf = append(buf, key...)
   146  	return buf, nil
   147  }
   148  
   149  func addObject(buf []byte, in map[string]interface{}) ([]byte, error) {
   150  	off := len(buf)
   151  	buf = addUint32(buf, uint32(len(in)))
   152  	objStart := len(buf)
   153  	buf = extendByte(buf, docSizeOff)
   154  	keyEntryStart := len(buf)
   155  	buf = extendByte(buf, len(in)*keyEntrySize)
   156  	valEntryStart := len(buf)
   157  	buf = extendByte(buf, len(in)*valEntrySize)
   158  	kvs := make([]kv, 0, len(in))
   159  	for k, v := range in {
   160  		kvs = append(kvs, kv{k, v})
   161  	}
   162  	sort.Slice(kvs, func(i, j int) bool {
   163  		return kvs[i].key < kvs[j].key
   164  	})
   165  	for i, kv := range kvs {
   166  		start := keyEntryStart + i*keyEntrySize
   167  		keyOff := len(buf) - off
   168  		var err error
   169  		buf, err = addKeyEntry(buf, start, keyOff, kv.key)
   170  		if err != nil {
   171  			return nil, err
   172  		}
   173  	}
   174  	for i, kv := range kvs {
   175  		var err error
   176  		valEntryOff := valEntryStart + i*valEntrySize
   177  		buf, err = addValEntry(buf, off, valEntryOff, kv.val)
   178  		if err != nil {
   179  			return nil, err
   180  		}
   181  	}
   182  	endian.PutUint32(buf[objStart:], uint32(len(buf)-off))
   183  	return buf, nil
   184  }
   185  func addArray(buf []byte, in []interface{}) ([]byte, error) {
   186  	off := len(buf)
   187  	buf = addUint32(buf, uint32(len(in)))
   188  	arrSizeStart := len(buf)
   189  	buf = extendByte(buf, docSizeOff)
   190  	valEntryStart := len(buf)
   191  	buf = extendByte(buf, len(in)*valEntrySize)
   192  	for i, v := range in {
   193  		var err error
   194  		buf, err = addValEntry(buf, off, valEntryStart+i*valEntrySize, v)
   195  		if err != nil {
   196  			return nil, err
   197  		}
   198  	}
   199  	arrSize := len(buf) - off
   200  	endian.PutUint32(buf[arrSizeStart:], uint32(arrSize))
   201  	return buf, nil
   202  }
   203  
   204  func addValEntry(buf []byte, bufStart, entryStart int, in interface{}) ([]byte, error) {
   205  	valStart := len(buf)
   206  	tpCode, buf, err := addElem(buf, in)
   207  	if err != nil {
   208  		return nil, err
   209  	}
   210  	switch tpCode {
   211  	case TpCodeLiteral:
   212  		lit := buf[valStart]
   213  		buf = buf[:valStart]
   214  		buf[entryStart] = byte(TpCodeLiteral)
   215  		buf[entryStart+1] = lit
   216  		return buf, nil
   217  	}
   218  	buf[entryStart] = byte(tpCode)
   219  	endian.PutUint32(buf[entryStart+1:], uint32(valStart-bufStart))
   220  	return buf, nil
   221  }
   222  
   223  func addUint32(buf []byte, x uint32) []byte {
   224  	off := len(buf)
   225  	buf = extendByte(buf, 4)
   226  	endian.PutUint32(buf[off:], x)
   227  	return buf
   228  }
   229  
   230  func checkFloat64(n float64) error {
   231  	if math.IsInf(n, 0) || math.IsNaN(n) {
   232  		return moerr.NewInvalidInputNoCtx("json float64 %f", n)
   233  	}
   234  	return nil
   235  }
   236  
   237  func addJsonNumber(buf []byte, in json.Number) (TpCode, []byte, error) {
   238  	//check if it is a float
   239  	if strings.ContainsAny(string(in), "Ee.") {
   240  		val, err := in.Float64()
   241  		if err != nil {
   242  			return TpCodeFloat64, nil, moerr.NewInvalidInputNoCtx("json number %v", in)
   243  		}
   244  		if err = checkFloat64(val); err != nil {
   245  			return TpCodeFloat64, nil, err
   246  		}
   247  		return TpCodeFloat64, addFloat64(buf, val), nil
   248  	}
   249  	if val, err := in.Int64(); err == nil { //check if it is an int
   250  		return TpCodeInt64, addInt64(buf, val), nil
   251  	}
   252  	if val, err := strconv.ParseUint(string(in), 10, 64); err == nil { //check if it is a uint
   253  		return TpCodeUint64, addUint64(buf, val), nil
   254  	}
   255  	if val, err := in.Float64(); err == nil { //check if it is a float
   256  		if err = checkFloat64(val); err != nil {
   257  			return TpCodeFloat64, nil, err
   258  		}
   259  		return TpCodeFloat64, addFloat64(buf, val), nil
   260  	}
   261  	var tpCode TpCode
   262  	return tpCode, nil, moerr.NewInvalidInputNoCtx("json number %v", in)
   263  }
   264  func string2Slice(s string) []byte {
   265  	str := (*reflect.StringHeader)(unsafe.Pointer(&s))
   266  	var ret []byte
   267  	retPtr := (*reflect.SliceHeader)(unsafe.Pointer(&ret))
   268  	retPtr.Data = str.Data
   269  	retPtr.Len = str.Len
   270  	retPtr.Cap = str.Len
   271  	return ret
   272  }
   273  func calStrLen(buf []byte) (int, int) {
   274  	strLen, lenLen := uint64(buf[0]), 1
   275  	if strLen >= utf8.RuneSelf {
   276  		strLen, lenLen = binary.Uvarint(buf)
   277  	}
   278  	return int(strLen), lenLen
   279  }
   280  func isIdentifier(s string) bool {
   281  	if len(s) == 0 {
   282  		return false
   283  	}
   284  	for i := 0; i < len(s); i++ {
   285  		if (i != 0 && s[i] >= '0' && s[i] <= '9') ||
   286  			(s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z') ||
   287  			s[i] == '_' || s[i] == '$' || s[i] >= 0x80 {
   288  			continue
   289  		}
   290  		return false
   291  	}
   292  	return true
   293  }
   294  
   295  func ParseJsonPath(path string) (p Path, err error) {
   296  	pg := newPathGenerator(path)
   297  	pg.trimSpace()
   298  	if !pg.hasNext() || pg.next() != '$' {
   299  		err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path)
   300  	}
   301  	pg.trimSpace()
   302  	subPaths := make([]subPath, 0, 8)
   303  	var ok bool
   304  	for pg.hasNext() {
   305  		switch pg.front() {
   306  		case '.':
   307  			subPaths, ok = pg.generateKey(subPaths)
   308  		case '[':
   309  			subPaths, ok = pg.generateIndex(subPaths)
   310  		case '*':
   311  			subPaths, ok = pg.generateDoubleStar(subPaths)
   312  		default:
   313  			ok = false
   314  		}
   315  		if !ok {
   316  			err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path)
   317  			return
   318  		}
   319  		pg.trimSpace()
   320  	}
   321  
   322  	if len(subPaths) > 0 && subPaths[len(subPaths)-1].tp == subPathDoubleStar {
   323  		err = moerr.NewInvalidInputNoCtx("invalid json path '%s'", path)
   324  		return
   325  	}
   326  	p.init(subPaths)
   327  	return
   328  }
   329  
   330  func addByteElem(buf []byte, entryStart int, elems []ByteJson) []byte {
   331  	for i, elem := range elems {
   332  		buf[entryStart+i*valEntrySize] = byte(elem.Type)
   333  		if elem.Type == TpCodeLiteral {
   334  			buf[entryStart+i*valEntrySize+valTypeSize] = elem.Data[0]
   335  		} else {
   336  			endian.PutUint32(buf[entryStart+i*valEntrySize+valTypeSize:], uint32(len(buf)))
   337  			buf = append(buf, elem.Data...)
   338  		}
   339  	}
   340  	return buf
   341  }
   342  
   343  func mergeToArray(origin []ByteJson) *ByteJson {
   344  	totalSize := headerSize + len(origin)*valEntrySize
   345  	for _, el := range origin {
   346  		if el.Type != TpCodeLiteral {
   347  			totalSize += len(el.Data)
   348  		}
   349  	}
   350  	buf := make([]byte, headerSize+len(origin)*valEntrySize, totalSize)
   351  	endian.PutUint32(buf, uint32(len(origin)))
   352  	endian.PutUint32(buf[docSizeOff:], uint32(totalSize))
   353  	buf = addByteElem(buf, headerSize, origin)
   354  	return &ByteJson{Type: TpCodeArray, Data: buf}
   355  }
   356  
   357  // check unnest mode
   358  func checkMode(mode string) bool {
   359  	if mode == "both" || mode == "array" || mode == "object" {
   360  		return true
   361  	}
   362  	return false
   363  }
   364  
   365  func genIndexOrKey(pathStr string) ([]byte, []byte) {
   366  	if pathStr[len(pathStr)-1] == ']' {
   367  		// find last '['
   368  		idx := strings.LastIndex(pathStr, "[")
   369  		return string2Slice(pathStr[idx : len(pathStr)-1]), nil
   370  	}
   371  	// find last '.'
   372  	idx := strings.LastIndex(pathStr, ".")
   373  	return nil, string2Slice(pathStr[idx+1:])
   374  }
   375  
   376  // for test
   377  func (r UnnestResult) String() string {
   378  	var buf bytes.Buffer
   379  	if val, ok := r["key"]; ok && val != nil {
   380  		buf.WriteString("key: ")
   381  		buf.WriteString(string(val) + ", ")
   382  	}
   383  	if val, ok := r["path"]; ok && val != nil {
   384  		buf.WriteString("path: ")
   385  		buf.WriteString(string(val) + ", ")
   386  	}
   387  	if val, ok := r["index"]; ok && val != nil {
   388  		buf.WriteString("index: ")
   389  		buf.WriteString(string(val) + ", ")
   390  	}
   391  	if val, ok := r["value"]; ok && val != nil {
   392  		buf.WriteString("value: ")
   393  		bj := ByteJson{}
   394  		bj.Unmarshal(val)
   395  		val, _ = bj.MarshalJSON()
   396  		buf.WriteString(string(val) + ", ")
   397  	}
   398  	if val, ok := r["this"]; ok && val != nil {
   399  		buf.WriteString("this: ")
   400  		bj := ByteJson{}
   401  		bj.Unmarshal(val)
   402  		val, _ = bj.MarshalJSON()
   403  		buf.WriteString(string(val))
   404  	}
   405  	return buf.String()
   406  }
   407  
   408  func checkAllNull(vals []ByteJson) bool {
   409  	allNull := true
   410  	for _, val := range vals {
   411  		if !val.IsNull() {
   412  			allNull = false
   413  			break
   414  		}
   415  	}
   416  	return allNull
   417  }