github.com/cloudwego/kitex@v0.9.0/pkg/utils/json.go (about)

     1  /*
     2   * MIT License
     3   *
     4   * Copyright (c) 2016 json-iterator
     5   *
     6   * Permission is hereby granted, free of charge, to any person obtaining a copy
     7   * of this software and associated documentation files (the "Software"), to deal
     8   * in the Software without restriction, including without limitation the rights
     9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    10   * copies of the Software, and to permit persons to whom the Software is
    11   * furnished to do so, subject to the following conditions:
    12   *
    13   * The above copyright notice and this permission notice shall be included in all
    14   * copies or substantial portions of the Software.
    15   *
    16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    19   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    22   * SOFTWARE.
    23   *
    24   * The source code of this file is written based on json-iterator,
    25   * all modifications are Copyright 2021 CloudWeGo Authors.
    26   */
    27  
    28  package utils
    29  
    30  import (
    31  	"errors"
    32  	"fmt"
    33  	"strings"
    34  	"unicode/utf16"
    35  	"unicode/utf8"
    36  	"unsafe"
    37  )
    38  
    39  // const of json keyword char
    40  const (
    41  	EmptyJSON  = "{}"
    42  	Comma      = ','
    43  	Colon      = ':'
    44  	DQuotation = '"'
    45  	LeftBrace  = '{'
    46  	RightBrace = '}'
    47  )
    48  
    49  const (
    50  	t1 = 0x00 // 0000 0000
    51  	tx = 0x80 // 1000 0000
    52  	t2 = 0xC0 // 1100 0000
    53  	t3 = 0xE0 // 1110 0000
    54  	t4 = 0xF0 // 1111 0000
    55  	t5 = 0xF8 // 1111 1000
    56  
    57  	maskx = 0x3F // 0011 1111
    58  
    59  	rune1Max = 1<<7 - 1
    60  	rune2Max = 1<<11 - 1
    61  	rune3Max = 1<<16 - 1
    62  
    63  	surrogateMin = 0xD800
    64  	surrogateMax = 0xDFFF
    65  
    66  	maxRune   = '\U0010FFFF' // Maximum valid Unicode code point.
    67  	runeError = '\uFFFD'     // the "error" Rune or "Unicode replacement character"
    68  
    69  	hex = "0123456789abcdef"
    70  )
    71  
    72  // Map2JSONStr transform map[string]string to json str, perf is better than use json lib directly
    73  func Map2JSONStr(mapInfo map[string]string) (str string, err error) {
    74  	defer func() {
    75  		if r := recover(); r != nil {
    76  			if e, ok := r.(error); ok {
    77  				err = fmt.Errorf("Map2JSONStr panic: %w", e)
    78  			} else {
    79  				err = fmt.Errorf("Map2JSONStr panic: %+v", r)
    80  			}
    81  		}
    82  	}()
    83  	size := len(mapInfo)
    84  	if mapInfo == nil || size == 0 {
    85  		return EmptyJSON, nil
    86  	}
    87  	// calculate actual byte size that avoid allocate mem multi times
    88  	idx := 0
    89  	byteSize := 2
    90  	for k, v := range mapInfo {
    91  		byteSize += len(k) + len(v) + 5
    92  		if idx++; idx < size {
    93  			byteSize++
    94  		}
    95  	}
    96  	var strBuilder strings.Builder
    97  	strBuilder.Grow(byteSize)
    98  	strBuilder.WriteByte(LeftBrace)
    99  	idx = 0
   100  	for k, v := range mapInfo {
   101  		wrapStrWithQuotation(k, &strBuilder)
   102  		strBuilder.WriteByte(Colon)
   103  		wrapStrWithQuotation(v, &strBuilder)
   104  		if idx++; idx < size {
   105  			strBuilder.WriteByte(Comma)
   106  		}
   107  	}
   108  	strBuilder.WriteByte(RightBrace)
   109  	return strBuilder.String(), nil
   110  }
   111  
   112  // JSONStr2Map transform json str to map[string]string, perf is better than use json lib directly
   113  func JSONStr2Map(jsonStr string) (mapInfo map[string]string, err error) {
   114  	defer func() {
   115  		if r := recover(); r != nil {
   116  			if e, ok := r.(error); ok {
   117  				err = fmt.Errorf("JSONStr2Map panic: %w", e)
   118  			} else {
   119  				err = fmt.Errorf("JSONStr2Map panic: %+v", r)
   120  			}
   121  		}
   122  	}()
   123  	data := []byte(jsonStr)
   124  	size := len(data)
   125  	lastIdx := size - 1
   126  	idx := 0
   127  	var c byte
   128  	if c, idx, err = nextToken(data, idx, lastIdx); err != nil {
   129  		return
   130  	}
   131  	var isNull bool
   132  	if idx, isNull = checkNull(c, data, idx, lastIdx); isNull {
   133  		return
   134  	}
   135  	if c != LeftBrace || data[size-1] != RightBrace {
   136  		err = fmt.Errorf("json str is invalid")
   137  		return
   138  	}
   139  	if ch, _, _ := nextToken(data, idx, lastIdx); ch == RightBrace {
   140  		return
   141  	}
   142  
   143  	mapInfo = make(map[string]string)
   144  	for ; c == Comma || c == LeftBrace; c, idx, err = nextToken(data, idx, lastIdx) {
   145  		if err != nil {
   146  			err = fmt.Errorf("json str is invalid")
   147  			return
   148  		}
   149  		var key, val string
   150  		if key, idx, err = readString(data, idx, lastIdx); err != nil {
   151  			return
   152  		}
   153  		if c, idx, err = nextToken(data, idx, lastIdx); c != ':' || err != nil {
   154  			err = fmt.Errorf("json str is invalid, expect ':' after object field, but found %s", string(c))
   155  			return
   156  		}
   157  		if val, idx, err = readString(data, idx, lastIdx); err != nil {
   158  			return
   159  		}
   160  		mapInfo[key] = val
   161  	}
   162  	return mapInfo, err
   163  }
   164  
   165  func readString(buf []byte, idx, lastIdx int) (string, int, error) {
   166  	var err error
   167  	var c byte
   168  	var isNull bool
   169  	if c, idx, err = nextToken(buf, idx, lastIdx); err != nil {
   170  		return "", idx, err
   171  	}
   172  	var str []byte
   173  	if c == '"' {
   174  		start := idx
   175  		noESC := true
   176  		for idx <= lastIdx {
   177  			if c, idx, err = readByte(buf, idx, lastIdx); err != nil {
   178  				return "", idx, err
   179  			}
   180  			switch c {
   181  			case '"':
   182  				if start < idx-1 {
   183  					if noESC {
   184  						str = buf[start : idx-1]
   185  					} else {
   186  						str = append(str, buf[start:idx-1]...)
   187  					}
   188  				}
   189  				return *(*string)(unsafe.Pointer(&str)), idx, nil
   190  			case '\\':
   191  				if start < idx-1 {
   192  					if noESC {
   193  						str = buf[start : idx-1]
   194  					} else {
   195  						str = append(str, buf[start:idx-1]...)
   196  					}
   197  				}
   198  				if c, idx, err = readByte(buf, idx, lastIdx); err != nil {
   199  					return "", idx, err
   200  				}
   201  				if str, idx, err = readEscapedChar(c, buf, idx, str, lastIdx); err != nil {
   202  					return "", 0, err
   203  				}
   204  				start = idx
   205  				noESC = false
   206  			}
   207  		}
   208  	} else if idx, isNull = checkNull(c, buf, idx, lastIdx); isNull {
   209  		return "", idx, nil
   210  	}
   211  	err = fmt.Errorf("json str is invalid, expects '\"' or n, but found %s", string(c))
   212  	return *(*string)(unsafe.Pointer(&str)), idx, err
   213  }
   214  
   215  func readByte(buf []byte, idx, lastIdx int) (byte, int, error) {
   216  	if lastIdx < idx {
   217  		return 0, -1, fmt.Errorf("readByte no more data")
   218  	}
   219  	c := buf[idx]
   220  	idx++
   221  	return c, idx, nil
   222  }
   223  
   224  func nextToken(buf []byte, idx, lastIdx int) (byte, int, error) {
   225  	if lastIdx < idx {
   226  		return 0, -1, errors.New("nextToken no more data")
   227  	}
   228  	var c byte
   229  	for idx <= lastIdx {
   230  		c = buf[idx]
   231  		idx++
   232  		switch c {
   233  		case ' ', '\n', '\t', '\r':
   234  			continue
   235  		}
   236  		return c, idx, nil
   237  	}
   238  	return c, idx, nil
   239  }
   240  
   241  func checkNull(c byte, data []byte, idx, lastIdx int) (int, bool) {
   242  	if c == 'n' {
   243  		ch, idx, _ := readByte(data, idx, lastIdx)
   244  		if ch != 'u' {
   245  			idx--
   246  			return idx, false
   247  		}
   248  		ch, idx, _ = readByte(data, idx, lastIdx)
   249  		if ch != 'l' {
   250  			idx--
   251  			return idx, false
   252  		}
   253  		ch, idx, _ = readByte(data, idx, lastIdx)
   254  		if ch != 'l' {
   255  			idx--
   256  			return idx, false
   257  		}
   258  		return idx, true
   259  	}
   260  	return idx, false
   261  }
   262  
   263  func readU4(buf []byte, idx, lastIdx int) (rune, int, error) {
   264  	var err error
   265  	var ret rune
   266  	for i := 0; i < 4; i++ {
   267  		var c byte
   268  		if c, idx, err = readByte(buf, idx, lastIdx); err != nil {
   269  			return ret, idx, err
   270  		}
   271  		if c >= '0' && c <= '9' {
   272  			ret = ret*16 + rune(c-'0')
   273  		} else if c >= 'a' && c <= 'f' {
   274  			ret = ret*16 + rune(c-'a'+10)
   275  		} else if c >= 'A' && c <= 'F' {
   276  			ret = ret*16 + rune(c-'A'+10)
   277  		} else {
   278  			return ret, idx, fmt.Errorf("unicode invalid: expects 0~9 or a~f, but found %v", string([]byte{c}))
   279  		}
   280  	}
   281  	return ret, idx, nil
   282  }
   283  
   284  // refer to json-iterator/go/iter_str readEscapedChar
   285  func readEscapedChar(c byte, buf []byte, idx int, str []byte, lastIdx int) ([]byte, int, error) {
   286  	var err error
   287  	switch c {
   288  	case 'u':
   289  		var r rune
   290  		if r, idx, err = readU4(buf, idx, lastIdx); err != nil {
   291  			return str, idx, err
   292  		}
   293  		// 是否是扩展字符
   294  		if utf16.IsSurrogate(r) {
   295  			if c, idx, err = readByte(buf, idx, lastIdx); err != nil {
   296  				return str, idx, err
   297  			}
   298  			if c != '\\' {
   299  				idx--
   300  				str = appendRune(str, r)
   301  				return str, idx, nil
   302  			}
   303  			if c, idx, err = readByte(buf, idx, lastIdx); err != nil {
   304  				return str, idx, err
   305  			}
   306  			if c != 'u' {
   307  				str = appendRune(str, r)
   308  				return readEscapedChar(c, buf, idx, str, lastIdx)
   309  			}
   310  			var r2 rune
   311  			if r2, idx, err = readU4(buf, idx, lastIdx); err != nil {
   312  				return str, idx, err
   313  			}
   314  			combined := utf16.DecodeRune(r, r2)
   315  			if combined == '\uFFFD' {
   316  				str = appendRune(str, r)
   317  				str = appendRune(str, r2)
   318  			} else {
   319  				str = appendRune(str, combined)
   320  			}
   321  		} else {
   322  			str = appendRune(str, r)
   323  		}
   324  	case '"':
   325  		str = append(str, '"')
   326  	case '\\':
   327  		str = append(str, '\\')
   328  	case '/':
   329  		str = append(str, '/')
   330  	case 'b':
   331  		str = append(str, '\b')
   332  	case 'f':
   333  		str = append(str, '\f')
   334  	case 'n':
   335  		str = append(str, '\n')
   336  	case 'r':
   337  		str = append(str, '\r')
   338  	case 't':
   339  		str = append(str, '\t')
   340  	default:
   341  		return str, idx, errors.New("invalid escape char after \\")
   342  	}
   343  	return str, idx, nil
   344  }
   345  
   346  // refer to json-iterator/go/stream_str writeStringSlowPath
   347  func wrapStrWithQuotation(s string, strBuilder *strings.Builder) {
   348  	strBuilder.WriteByte(DQuotation)
   349  	valLen := len(s)
   350  	i := 0
   351  	start := i
   352  	for i < valLen {
   353  		c := s[i]
   354  		if c < utf8.RuneSelf && htmlSafeSet[c] {
   355  			i++
   356  			continue
   357  		} else {
   358  			if b := s[i]; b < utf8.RuneSelf {
   359  				if start < i {
   360  					strBuilder.WriteString(s[start:i])
   361  				}
   362  				switch b {
   363  				case '\\', '"':
   364  					strBuilder.WriteByte('\\')
   365  					strBuilder.WriteByte(b)
   366  				case '\n':
   367  					strBuilder.WriteByte('\\')
   368  					strBuilder.WriteByte('n')
   369  				case '\r':
   370  					strBuilder.WriteByte('\\')
   371  					strBuilder.WriteByte('r')
   372  				case '\t':
   373  					strBuilder.WriteByte('\\')
   374  					strBuilder.WriteByte('t')
   375  				default:
   376  					// This encodes bytes < 0x20 except for \t, \n and \r.
   377  					// If escapeHTML is set, it also escapes <, >, and &
   378  					// because they can lead to security holes when
   379  					// user-controlled strings are rendered into JSON
   380  					// and served to some browsers.
   381  					strBuilder.WriteString(`\u00`)
   382  					strBuilder.WriteByte(hex[b>>4])
   383  					strBuilder.WriteByte(hex[b&0xF])
   384  				}
   385  				i++
   386  				start = i
   387  				continue
   388  			}
   389  			c, size := utf8.DecodeRuneInString(s[i:])
   390  			if c == utf8.RuneError && size == 1 {
   391  				if start < i {
   392  					strBuilder.WriteString(s[start:i])
   393  				}
   394  				strBuilder.WriteString(`\ufffd`)
   395  				i++
   396  				start = i
   397  				continue
   398  			}
   399  			// U+2028 is LINE SEPARATOR.
   400  			// U+2029 is PARAGRAPH SEPARATOR.
   401  			// They are both technically valid characters in JSON strings,
   402  			// but don't work in JSONP, which has to be evaluated as JavaScript,
   403  			// and can lead to security holes there. It is valid JSON to
   404  			// escape them, so we do so unconditionally.
   405  			// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
   406  			if c == '\u2028' || c == '\u2029' {
   407  				if start < i {
   408  					strBuilder.WriteString(s[start:i])
   409  				}
   410  				strBuilder.WriteString(`\u202`)
   411  				strBuilder.WriteByte(hex[c&0xF])
   412  				i += size
   413  				start = i
   414  				continue
   415  			}
   416  			i += size
   417  		}
   418  	}
   419  	if start < valLen {
   420  		strBuilder.WriteString(s[start:])
   421  	}
   422  	strBuilder.WriteByte(DQuotation)
   423  }
   424  
   425  // refer to json-iterator/go/iter_str appendRune
   426  func appendRune(p []byte, r rune) []byte {
   427  	// Negative values are erroneous. Making it unsigned addresses the problem.
   428  	switch i := uint32(r); {
   429  	case i <= rune1Max:
   430  		p = append(p, byte(r))
   431  		return p
   432  	case i <= rune2Max:
   433  		p = append(p, t2|byte(r>>6))
   434  		p = append(p, tx|byte(r)&maskx)
   435  		return p
   436  	case i > maxRune, surrogateMin <= i && i <= surrogateMax:
   437  		r = runeError
   438  		fallthrough
   439  	case i <= rune3Max:
   440  		p = append(p, t3|byte(r>>12))
   441  		p = append(p, tx|byte(r>>6)&maskx)
   442  		p = append(p, tx|byte(r)&maskx)
   443  		return p
   444  	default:
   445  		p = append(p, t4|byte(r>>18))
   446  		p = append(p, tx|byte(r>>12)&maskx)
   447  		p = append(p, tx|byte(r>>6)&maskx)
   448  		p = append(p, tx|byte(r)&maskx)
   449  		return p
   450  	}
   451  }
   452  
   453  var htmlSafeSet = [utf8.RuneSelf]bool{
   454  	' ':      true,
   455  	'!':      true,
   456  	'"':      false,
   457  	'#':      true,
   458  	'$':      true,
   459  	'%':      true,
   460  	'&':      false,
   461  	'\'':     true,
   462  	'(':      true,
   463  	')':      true,
   464  	'*':      true,
   465  	'+':      true,
   466  	',':      true,
   467  	'-':      true,
   468  	'.':      true,
   469  	'/':      true,
   470  	'0':      true,
   471  	'1':      true,
   472  	'2':      true,
   473  	'3':      true,
   474  	'4':      true,
   475  	'5':      true,
   476  	'6':      true,
   477  	'7':      true,
   478  	'8':      true,
   479  	'9':      true,
   480  	':':      true,
   481  	';':      true,
   482  	'<':      false,
   483  	'=':      true,
   484  	'>':      false,
   485  	'?':      true,
   486  	'@':      true,
   487  	'A':      true,
   488  	'B':      true,
   489  	'C':      true,
   490  	'D':      true,
   491  	'E':      true,
   492  	'F':      true,
   493  	'G':      true,
   494  	'H':      true,
   495  	'I':      true,
   496  	'J':      true,
   497  	'K':      true,
   498  	'L':      true,
   499  	'M':      true,
   500  	'N':      true,
   501  	'O':      true,
   502  	'P':      true,
   503  	'Q':      true,
   504  	'R':      true,
   505  	'S':      true,
   506  	'T':      true,
   507  	'U':      true,
   508  	'V':      true,
   509  	'W':      true,
   510  	'X':      true,
   511  	'Y':      true,
   512  	'Z':      true,
   513  	'[':      true,
   514  	'\\':     false,
   515  	']':      true,
   516  	'^':      true,
   517  	'_':      true,
   518  	'`':      true,
   519  	'a':      true,
   520  	'b':      true,
   521  	'c':      true,
   522  	'd':      true,
   523  	'e':      true,
   524  	'f':      true,
   525  	'g':      true,
   526  	'h':      true,
   527  	'i':      true,
   528  	'j':      true,
   529  	'k':      true,
   530  	'l':      true,
   531  	'm':      true,
   532  	'n':      true,
   533  	'o':      true,
   534  	'p':      true,
   535  	'q':      true,
   536  	'r':      true,
   537  	's':      true,
   538  	't':      true,
   539  	'u':      true,
   540  	'v':      true,
   541  	'w':      true,
   542  	'x':      true,
   543  	'y':      true,
   544  	'z':      true,
   545  	'{':      true,
   546  	'|':      true,
   547  	'}':      true,
   548  	'~':      true,
   549  	'\u007f': true,
   550  }