github.com/XiaoMi/Gaea@v1.2.5/parser/tidb-types/json/binary.go (about) 1 // Copyright 2017 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package json 15 16 import ( 17 "bytes" 18 "encoding/binary" 19 "encoding/json" 20 "fmt" 21 "math" 22 "reflect" 23 "sort" 24 "strconv" 25 "strings" 26 "unicode/utf8" 27 28 "github.com/pingcap/errors" 29 30 "github.com/XiaoMi/Gaea/parser/terror" 31 "github.com/XiaoMi/Gaea/util/hack" 32 ) 33 34 /* 35 The binary JSON format from MySQL 5.7 is as follows: 36 37 JSON doc ::= type value 38 type ::= 39 0x01 | // large JSON object 40 0x03 | // large JSON array 41 0x04 | // literal (true/false/null) 42 0x05 | // int16 43 0x06 | // uint16 44 0x07 | // int32 45 0x08 | // uint32 46 0x09 | // int64 47 0x0a | // uint64 48 0x0b | // double 49 0x0c | // utf8mb4 string 50 51 value ::= 52 object | 53 array | 54 literal | 55 number | 56 string | 57 58 object ::= element-count size key-entry* value-entry* key* value* 59 60 array ::= element-count size value-entry* value* 61 62 // number of members in object or number of elements in array 63 element-count ::= uint32 64 65 // number of bytes in the binary representation of the object or array 66 size ::= uint32 67 68 key-entry ::= key-offset key-length 69 70 key-offset ::= uint32 71 72 key-length ::= uint16 // key length must be less than 64KB 73 74 value-entry ::= type offset-or-inlined-value 75 76 // This field holds either the offset to where the value is stored, 77 // or the value itself if it is small enough to be inlined (that is, 78 // if it is a JSON literal or a small enough [u]int). 79 offset-or-inlined-value ::= uint32 80 81 key ::= utf8mb4-data 82 83 literal ::= 84 0x00 | // JSON null literal 85 0x01 | // JSON true literal 86 0x02 | // JSON false literal 87 88 number ::= .... // little-endian format for [u]int(16|32|64), whereas 89 // double is stored in a platform-independent, eight-byte 90 // format using float8store() 91 92 string ::= data-length utf8mb4-data 93 94 data-length ::= uint8* // If the high bit of a byte is 1, the length 95 // field is continued in the next byte, 96 // otherwise it is the last byte of the length 97 // field. So we need 1 byte to represent 98 // lengths up to 127, 2 bytes to represent 99 // lengths up to 16383, and so on... 100 */ 101 102 // BinaryJSON represents a binary encoded JSON object. 103 // It can be randomly accessed without deserialization. 104 type BinaryJSON struct { 105 TypeCode TypeCode 106 Value []byte 107 } 108 109 // String implements fmt.Stringer interface. 110 func (bj BinaryJSON) String() string { 111 out, err := bj.MarshalJSON() 112 terror.Log(err) 113 return string(out) 114 } 115 116 // Copy makes a copy of the BinaryJSON 117 func (bj BinaryJSON) Copy() BinaryJSON { 118 buf := make([]byte, len(bj.Value)) 119 copy(buf, bj.Value) 120 return BinaryJSON{TypeCode: bj.TypeCode, Value: buf} 121 } 122 123 // MarshalJSON implements the json.Marshaler interface. 124 func (bj BinaryJSON) MarshalJSON() ([]byte, error) { 125 buf := make([]byte, 0, len(bj.Value)*3/2) 126 return bj.marshalTo(buf) 127 } 128 129 func (bj BinaryJSON) marshalTo(buf []byte) ([]byte, error) { 130 switch bj.TypeCode { 131 case TypeCodeString: 132 return marshalStringTo(buf, bj.GetString()), nil 133 case TypeCodeLiteral: 134 return marshalLiteralTo(buf, bj.Value[0]), nil 135 case TypeCodeInt64: 136 return strconv.AppendInt(buf, bj.GetInt64(), 10), nil 137 case TypeCodeUint64: 138 return strconv.AppendUint(buf, bj.GetUint64(), 10), nil 139 case TypeCodeFloat64: 140 return bj.marshalFloat64To(buf) 141 case TypeCodeArray: 142 return bj.marshalArrayTo(buf) 143 case TypeCodeObject: 144 return bj.marshalObjTo(buf) 145 } 146 return buf, nil 147 } 148 149 // GetInt64 gets the int64 value. 150 func (bj BinaryJSON) GetInt64() int64 { 151 return int64(endian.Uint64(bj.Value)) 152 } 153 154 // GetUint64 gets the uint64 value. 155 func (bj BinaryJSON) GetUint64() uint64 { 156 return endian.Uint64(bj.Value) 157 } 158 159 // GetFloat64 gets the float64 value. 160 func (bj BinaryJSON) GetFloat64() float64 { 161 return math.Float64frombits(bj.GetUint64()) 162 } 163 164 // GetString gets the string value. 165 func (bj BinaryJSON) GetString() []byte { 166 strLen, lenLen := uint64(bj.Value[0]), 1 167 if strLen >= utf8.RuneSelf { 168 strLen, lenLen = binary.Uvarint(bj.Value) 169 } 170 return bj.Value[lenLen : lenLen+int(strLen)] 171 } 172 173 // GetKeys gets the keys of the object 174 func (bj BinaryJSON) GetKeys() BinaryJSON { 175 count := bj.GetElemCount() 176 ret := make([]BinaryJSON, 0, count) 177 for i := 0; i < count; i++ { 178 ret = append(ret, CreateBinary(string(bj.objectGetKey(i)))) 179 } 180 return buildBinaryArray(ret) 181 } 182 183 // GetElemCount gets the count of Object or Array. 184 func (bj BinaryJSON) GetElemCount() int { 185 return int(endian.Uint32(bj.Value)) 186 } 187 188 func (bj BinaryJSON) arrayGetElem(idx int) BinaryJSON { 189 return bj.valEntryGet(headerSize + idx*valEntrySize) 190 } 191 192 func (bj BinaryJSON) objectGetKey(i int) []byte { 193 keyOff := int(endian.Uint32(bj.Value[headerSize+i*keyEntrySize:])) 194 keyLen := int(endian.Uint16(bj.Value[headerSize+i*keyEntrySize+keyLenOff:])) 195 return bj.Value[keyOff : keyOff+keyLen] 196 } 197 198 func (bj BinaryJSON) objectGetVal(i int) BinaryJSON { 199 elemCount := bj.GetElemCount() 200 return bj.valEntryGet(headerSize + elemCount*keyEntrySize + i*valEntrySize) 201 } 202 203 func (bj BinaryJSON) valEntryGet(valEntryOff int) BinaryJSON { 204 tpCode := bj.Value[valEntryOff] 205 valOff := endian.Uint32(bj.Value[valEntryOff+valTypeSize:]) 206 switch tpCode { 207 case TypeCodeLiteral: 208 return BinaryJSON{TypeCode: TypeCodeLiteral, Value: bj.Value[valEntryOff+valTypeSize : valEntryOff+valTypeSize+1]} 209 case TypeCodeUint64, TypeCodeInt64, TypeCodeFloat64: 210 return BinaryJSON{TypeCode: tpCode, Value: bj.Value[valOff : valOff+8]} 211 case TypeCodeString: 212 strLen, lenLen := uint64(bj.Value[valOff]), 1 213 if strLen >= utf8.RuneSelf { 214 strLen, lenLen = binary.Uvarint(bj.Value[valOff:]) 215 } 216 totalLen := uint32(lenLen) + uint32(strLen) 217 return BinaryJSON{TypeCode: tpCode, Value: bj.Value[valOff : valOff+totalLen]} 218 } 219 dataSize := endian.Uint32(bj.Value[valOff+dataSizeOff:]) 220 return BinaryJSON{TypeCode: tpCode, Value: bj.Value[valOff : valOff+dataSize]} 221 } 222 223 func (bj BinaryJSON) marshalFloat64To(buf []byte) ([]byte, error) { 224 // NOTE: copied from Go standard library. 225 f := bj.GetFloat64() 226 if math.IsInf(f, 0) || math.IsNaN(f) { 227 return buf, &json.UnsupportedValueError{Str: strconv.FormatFloat(f, 'g', -1, 64)} 228 } 229 230 // Convert as if by ES6 number to string conversion. 231 // This matches most other JSON generators. 232 // See golang.org/issue/6384 and golang.org/issue/14135. 233 // Like fmt %g, but the exponent cutoffs are different 234 // and exponents themselves are not padded to two digits. 235 abs := math.Abs(f) 236 ffmt := byte('f') 237 // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. 238 if abs != 0 { 239 if abs < 1e-6 || abs >= 1e21 { 240 ffmt = 'e' 241 } 242 } 243 buf = strconv.AppendFloat(buf, f, ffmt, -1, 64) 244 if ffmt == 'e' { 245 // clean up e-09 to e-9 246 n := len(buf) 247 if n >= 4 && buf[n-4] == 'e' && buf[n-3] == '-' && buf[n-2] == '0' { 248 buf[n-2] = buf[n-1] 249 buf = buf[:n-1] 250 } 251 } 252 return buf, nil 253 } 254 255 func (bj BinaryJSON) marshalArrayTo(buf []byte) ([]byte, error) { 256 elemCount := int(endian.Uint32(bj.Value)) 257 buf = append(buf, '[') 258 for i := 0; i < elemCount; i++ { 259 if i != 0 { 260 buf = append(buf, ", "...) 261 } 262 var err error 263 buf, err = bj.arrayGetElem(i).marshalTo(buf) 264 if err != nil { 265 return nil, errors.Trace(err) 266 } 267 } 268 return append(buf, ']'), nil 269 } 270 271 func (bj BinaryJSON) marshalObjTo(buf []byte) ([]byte, error) { 272 elemCount := int(endian.Uint32(bj.Value)) 273 buf = append(buf, '{') 274 for i := 0; i < elemCount; i++ { 275 if i != 0 { 276 buf = append(buf, ", "...) 277 } 278 buf = marshalStringTo(buf, bj.objectGetKey(i)) 279 buf = append(buf, ": "...) 280 var err error 281 buf, err = bj.objectGetVal(i).marshalTo(buf) 282 if err != nil { 283 return nil, errors.Trace(err) 284 } 285 } 286 return append(buf, '}'), nil 287 } 288 289 func marshalStringTo(buf, s []byte) []byte { 290 // NOTE: copied from Go standard library. 291 // NOTE: keep in sync with string above. 292 buf = append(buf, '"') 293 start := 0 294 for i := 0; i < len(s); { 295 if b := s[i]; b < utf8.RuneSelf { 296 if htmlSafeSet[b] { 297 i++ 298 continue 299 } 300 if start < i { 301 buf = append(buf, s[start:i]...) 302 } 303 switch b { 304 case '\\', '"': 305 buf = append(buf, '\\', b) 306 case '\n': 307 buf = append(buf, '\\', 'n') 308 case '\r': 309 buf = append(buf, '\\', 'r') 310 case '\t': 311 buf = append(buf, '\\', 't') 312 default: 313 // This encodes bytes < 0x20 except for \t, \n and \r. 314 // If escapeHTML is set, it also escapes <, >, and & 315 // because they can lead to security holes when 316 // user-controlled strings are rendered into JSON 317 // and served to some browsers. 318 buf = append(buf, `\u00`...) 319 buf = append(buf, hexChars[b>>4], hexChars[b&0xF]) 320 } 321 i++ 322 start = i 323 continue 324 } 325 c, size := utf8.DecodeRune(s[i:]) 326 if c == utf8.RuneError && size == 1 { 327 if start < i { 328 buf = append(buf, s[start:i]...) 329 } 330 buf = append(buf, `\ufffd`...) 331 i += size 332 start = i 333 continue 334 } 335 // U+2028 is LINE SEPARATOR. 336 // U+2029 is PARAGRAPH SEPARATOR. 337 // They are both technically valid characters in JSON strings, 338 // but don't work in JSONP, which has to be evaluated as JavaScript, 339 // and can lead to security holes there. It is valid JSON to 340 // escape them, so we do so unconditionally. 341 // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. 342 if c == '\u2028' || c == '\u2029' { 343 if start < i { 344 buf = append(buf, s[start:i]...) 345 } 346 buf = append(buf, `\u202`...) 347 buf = append(buf, hexChars[c&0xF]) 348 i += size 349 start = i 350 continue 351 } 352 i += size 353 } 354 if start < len(s) { 355 buf = append(buf, s[start:]...) 356 } 357 buf = append(buf, '"') 358 return buf 359 } 360 361 func (bj BinaryJSON) marshalValueEntryTo(buf []byte, entryOff int) ([]byte, error) { 362 tpCode := bj.Value[entryOff] 363 switch tpCode { 364 case TypeCodeLiteral: 365 buf = marshalLiteralTo(buf, bj.Value[entryOff+1]) 366 default: 367 offset := endian.Uint32(bj.Value[entryOff+1:]) 368 tmp := BinaryJSON{TypeCode: tpCode, Value: bj.Value[offset:]} 369 var err error 370 buf, err = tmp.marshalTo(buf) 371 if err != nil { 372 return nil, errors.Trace(err) 373 } 374 } 375 return buf, nil 376 } 377 378 func marshalLiteralTo(b []byte, litType byte) []byte { 379 switch litType { 380 case LiteralFalse: 381 return append(b, "false"...) 382 case LiteralTrue: 383 return append(b, "true"...) 384 case LiteralNil: 385 return append(b, "null"...) 386 } 387 return b 388 } 389 390 // ParseBinaryFromString parses a json from string. 391 func ParseBinaryFromString(s string) (bj BinaryJSON, err error) { 392 if len(s) == 0 { 393 err = ErrInvalidJSONText.GenWithStackByArgs("The document is empty") 394 return 395 } 396 if err = bj.UnmarshalJSON(hack.Slice(s)); err != nil { 397 err = ErrInvalidJSONText.GenWithStackByArgs(err) 398 } 399 return 400 } 401 402 // UnmarshalJSON implements the json.Unmarshaler interface. 403 func (bj *BinaryJSON) UnmarshalJSON(data []byte) error { 404 var decoder = json.NewDecoder(bytes.NewReader(data)) 405 decoder.UseNumber() 406 var in interface{} 407 err := decoder.Decode(&in) 408 if err != nil { 409 return errors.Trace(err) 410 } 411 buf := make([]byte, 0, len(data)) 412 var typeCode TypeCode 413 typeCode, buf, err = appendBinary(buf, in) 414 if err != nil { 415 return errors.Trace(err) 416 } 417 bj.TypeCode = typeCode 418 bj.Value = buf 419 return nil 420 } 421 422 // CreateBinary creates a BinaryJSON from interface. 423 func CreateBinary(in interface{}) BinaryJSON { 424 typeCode, buf, err := appendBinary(nil, in) 425 if err != nil { 426 panic(err) 427 } 428 return BinaryJSON{TypeCode: typeCode, Value: buf} 429 } 430 431 func appendBinary(buf []byte, in interface{}) (TypeCode, []byte, error) { 432 var typeCode byte 433 var err error 434 switch x := in.(type) { 435 case nil: 436 typeCode = TypeCodeLiteral 437 buf = append(buf, LiteralNil) 438 case bool: 439 typeCode = TypeCodeLiteral 440 if x { 441 buf = append(buf, LiteralTrue) 442 } else { 443 buf = append(buf, LiteralFalse) 444 } 445 case int64: 446 typeCode = TypeCodeInt64 447 buf = appendBinaryUint64(buf, uint64(x)) 448 case uint64: 449 typeCode = TypeCodeUint64 450 buf = appendBinaryUint64(buf, x) 451 case float64: 452 typeCode = TypeCodeFloat64 453 buf = appendBinaryFloat64(buf, x) 454 case json.Number: 455 typeCode, buf, err = appendBinaryNumber(buf, x) 456 if err != nil { 457 return typeCode, nil, errors.Trace(err) 458 } 459 case string: 460 typeCode = TypeCodeString 461 buf = appendBinaryString(buf, x) 462 case BinaryJSON: 463 typeCode = x.TypeCode 464 buf = append(buf, x.Value...) 465 case []interface{}: 466 typeCode = TypeCodeArray 467 buf, err = appendBinaryArray(buf, x) 468 if err != nil { 469 return typeCode, nil, errors.Trace(err) 470 } 471 case map[string]interface{}: 472 typeCode = TypeCodeObject 473 buf, err = appendBinaryObject(buf, x) 474 if err != nil { 475 return typeCode, nil, errors.Trace(err) 476 } 477 default: 478 msg := fmt.Sprintf(unknownTypeErrorMsg, reflect.TypeOf(in)) 479 err = errors.New(msg) 480 } 481 return typeCode, buf, err 482 } 483 484 func appendZero(buf []byte, length int) []byte { 485 var tmp [8]byte 486 rem := length % 8 487 loop := length / 8 488 for i := 0; i < loop; i++ { 489 buf = append(buf, tmp[:]...) 490 } 491 for i := 0; i < rem; i++ { 492 buf = append(buf, 0) 493 } 494 return buf 495 } 496 497 func appendUint32(buf []byte, v uint32) []byte { 498 var tmp [4]byte 499 endian.PutUint32(tmp[:], v) 500 return append(buf, tmp[:]...) 501 } 502 503 func appendBinaryNumber(buf []byte, x json.Number) (TypeCode, []byte, error) { 504 var typeCode TypeCode 505 if strings.ContainsAny(string(x), "Ee.") { 506 typeCode = TypeCodeFloat64 507 f64, err := x.Float64() 508 if err != nil { 509 return typeCode, nil, errors.Trace(err) 510 } 511 buf = appendBinaryFloat64(buf, f64) 512 } else { 513 typeCode = TypeCodeInt64 514 i64, err := x.Int64() 515 if err != nil { 516 typeCode = TypeCodeFloat64 517 f64, err := x.Float64() 518 if err != nil { 519 return typeCode, nil, errors.Trace(err) 520 } 521 buf = appendBinaryFloat64(buf, f64) 522 } else { 523 buf = appendBinaryUint64(buf, uint64(i64)) 524 } 525 } 526 return typeCode, buf, nil 527 } 528 529 func appendBinaryString(buf []byte, v string) []byte { 530 begin := len(buf) 531 buf = appendZero(buf, binary.MaxVarintLen64) 532 lenLen := binary.PutUvarint(buf[begin:], uint64(len(v))) 533 buf = buf[:len(buf)-binary.MaxVarintLen64+lenLen] 534 buf = append(buf, v...) 535 return buf 536 } 537 538 func appendBinaryFloat64(buf []byte, v float64) []byte { 539 off := len(buf) 540 buf = appendZero(buf, 8) 541 endian.PutUint64(buf[off:], math.Float64bits(v)) 542 return buf 543 } 544 545 func appendBinaryUint64(buf []byte, v uint64) []byte { 546 off := len(buf) 547 buf = appendZero(buf, 8) 548 endian.PutUint64(buf[off:], v) 549 return buf 550 } 551 552 func appendBinaryArray(buf []byte, array []interface{}) ([]byte, error) { 553 docOff := len(buf) 554 buf = appendUint32(buf, uint32(len(array))) 555 buf = appendZero(buf, dataSizeOff) 556 valEntryBegin := len(buf) 557 buf = appendZero(buf, len(array)*valEntrySize) 558 for i, val := range array { 559 var err error 560 buf, err = appendBinaryValElem(buf, docOff, valEntryBegin+i*valEntrySize, val) 561 if err != nil { 562 return nil, errors.Trace(err) 563 } 564 } 565 docSize := len(buf) - docOff 566 endian.PutUint32(buf[docOff+dataSizeOff:], uint32(docSize)) 567 return buf, nil 568 } 569 570 func appendBinaryValElem(buf []byte, docOff, valEntryOff int, val interface{}) ([]byte, error) { 571 var typeCode TypeCode 572 var err error 573 elemDocOff := len(buf) 574 typeCode, buf, err = appendBinary(buf, val) 575 if err != nil { 576 return nil, errors.Trace(err) 577 } 578 switch typeCode { 579 case TypeCodeLiteral: 580 litCode := buf[elemDocOff] 581 buf = buf[:elemDocOff] 582 buf[valEntryOff] = TypeCodeLiteral 583 buf[valEntryOff+1] = litCode 584 return buf, nil 585 } 586 buf[valEntryOff] = typeCode 587 valOff := elemDocOff - docOff 588 endian.PutUint32(buf[valEntryOff+1:], uint32(valOff)) 589 return buf, nil 590 } 591 592 type field struct { 593 key string 594 val interface{} 595 } 596 597 func appendBinaryObject(buf []byte, x map[string]interface{}) ([]byte, error) { 598 docOff := len(buf) 599 buf = appendUint32(buf, uint32(len(x))) 600 buf = appendZero(buf, dataSizeOff) 601 keyEntryBegin := len(buf) 602 buf = appendZero(buf, len(x)*keyEntrySize) 603 valEntryBegin := len(buf) 604 buf = appendZero(buf, len(x)*valEntrySize) 605 606 fields := make([]field, 0, len(x)) 607 for key, val := range x { 608 fields = append(fields, field{key: key, val: val}) 609 } 610 sort.Slice(fields, func(i, j int) bool { 611 return fields[i].key < fields[j].key 612 }) 613 for i, field := range fields { 614 keyEntryOff := keyEntryBegin + i*keyEntrySize 615 keyOff := len(buf) - docOff 616 keyLen := uint32(len(field.key)) 617 endian.PutUint32(buf[keyEntryOff:], uint32(keyOff)) 618 endian.PutUint16(buf[keyEntryOff+keyLenOff:], uint16(keyLen)) 619 buf = append(buf, field.key...) 620 } 621 for i, field := range fields { 622 var err error 623 buf, err = appendBinaryValElem(buf, docOff, valEntryBegin+i*valEntrySize, field.val) 624 if err != nil { 625 return nil, errors.Trace(err) 626 } 627 } 628 docSize := len(buf) - docOff 629 endian.PutUint32(buf[docOff+dataSizeOff:], uint32(docSize)) 630 return buf, nil 631 }