github.com/dolthub/go-mysql-server@v0.18.0/sql/types/strings.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package types 16 17 import ( 18 "encoding/json" 19 "fmt" 20 "reflect" 21 "strconv" 22 strings2 "strings" 23 "time" 24 "unicode/utf8" 25 26 "github.com/dolthub/vitess/go/sqltypes" 27 "github.com/dolthub/vitess/go/vt/proto/query" 28 "github.com/shopspring/decimal" 29 "gopkg.in/src-d/go-errors.v1" 30 31 "github.com/dolthub/go-mysql-server/internal/strings" 32 "github.com/dolthub/go-mysql-server/sql" 33 "github.com/dolthub/go-mysql-server/sql/encodings" 34 ) 35 36 const ( 37 charBinaryMax = 255 38 varcharVarbinaryMax = 65_535 39 MaxRowLength = 65_535 40 41 TinyTextBlobMax = charBinaryMax 42 TextBlobMax = varcharVarbinaryMax 43 MediumTextBlobMax = 16_777_215 44 LongTextBlobMax = int64(4_294_967_295) 45 ) 46 47 var ( 48 // ErrLengthTooLarge is thrown when a string's length is too large given the other parameters. 49 ErrLengthTooLarge = errors.NewKind("length is %v but max allowed is %v") 50 ErrLengthBeyondLimit = errors.NewKind("string '%v' is too large for column '%v'") 51 ErrBinaryCollation = errors.NewKind("binary types must have the binary collation: %v") 52 53 TinyText = MustCreateStringWithDefaults(sqltypes.Text, TinyTextBlobMax) 54 Text = MustCreateStringWithDefaults(sqltypes.Text, TextBlobMax) 55 MediumText = MustCreateStringWithDefaults(sqltypes.Text, MediumTextBlobMax) 56 LongText = MustCreateStringWithDefaults(sqltypes.Text, LongTextBlobMax) 57 TinyBlob = MustCreateBinary(sqltypes.Blob, TinyTextBlobMax) 58 Blob = MustCreateBinary(sqltypes.Blob, TextBlobMax) 59 MediumBlob = MustCreateBinary(sqltypes.Blob, MediumTextBlobMax) 60 LongBlob = MustCreateBinary(sqltypes.Blob, LongTextBlobMax) 61 62 stringValueType = reflect.TypeOf(string("")) 63 byteValueType = reflect.TypeOf(([]byte)(nil)) 64 ) 65 66 type StringType struct { 67 baseType query.Type 68 maxCharLength int64 69 maxByteLength int64 70 collation sql.CollationID 71 } 72 73 var _ sql.StringType = StringType{} 74 var _ sql.TypeWithCollation = StringType{} 75 var _ sql.CollationCoercible = StringType{} 76 77 // CreateString creates a new StringType based on the specified type, length, and collation. Length is interpreted as 78 // the length of bytes in the new StringType for SQL types that are based on bytes (i.e. TEXT, BLOB, BINARY, and 79 // VARBINARY). For all other char-based SQL types, length is interpreted as the length of chars in the new 80 // StringType (i.e. CHAR, and VARCHAR). 81 func CreateString(baseType query.Type, length int64, collation sql.CollationID) (sql.StringType, error) { 82 // TODO: remove character set and collation validity checks once all collations have been implemented (delete errors as well) 83 if collation.CharacterSet().Encoder() == nil { 84 return nil, sql.ErrCharSetNotYetImplementedTemp.New(collation.CharacterSet().Name()) 85 } else if collation.Sorter() == nil { 86 return nil, sql.ErrCollationNotYetImplementedTemp.New(collation.Name()) 87 } 88 89 // Check the base type first and fail immediately if it's unknown 90 switch baseType { 91 case sqltypes.Char, sqltypes.Binary, sqltypes.VarChar, sqltypes.VarBinary, sqltypes.Text, sqltypes.Blob: 92 default: 93 return nil, sql.ErrInvalidBaseType.New(baseType.String(), "string") 94 } 95 96 // We accept a length of zero, but a negative length is not valid 97 if length < 0 { 98 return nil, fmt.Errorf("length of %v is less than the minimum of 0", length) 99 } 100 101 switch baseType { 102 case sqltypes.Binary, sqltypes.VarBinary, sqltypes.Blob: 103 if collation != sql.Collation_binary { 104 return nil, ErrBinaryCollation.New(collation.Name()) 105 } 106 } 107 108 // If the CharacterSet is binary, then we convert the type to the binary equivalent 109 if collation.Equals(sql.Collation_binary) { 110 switch baseType { 111 case sqltypes.Char: 112 baseType = sqltypes.Binary 113 case sqltypes.VarChar: 114 baseType = sqltypes.VarBinary 115 case sqltypes.Text: 116 baseType = sqltypes.Blob 117 } 118 } 119 120 // Determine the max byte length and max char length based on whether the base type is byte-based or char-based 121 charsetMaxLength := collation.CharacterSet().MaxLength() 122 maxCharLength := length 123 maxByteLength := length 124 switch baseType { 125 case sqltypes.Char, sqltypes.VarChar: 126 maxByteLength = length * charsetMaxLength 127 case sqltypes.Binary, sqltypes.VarBinary, sqltypes.Text, sqltypes.Blob: 128 maxCharLength = length / charsetMaxLength 129 } 130 131 // Make sure that length is valid depending on the base type, since they each handle lengths differently 132 switch baseType { 133 case sqltypes.Char: 134 if maxCharLength > charBinaryMax { 135 return nil, ErrLengthTooLarge.New(length, charBinaryMax) 136 } 137 case sqltypes.VarChar: 138 if maxCharLength > varcharVarbinaryMax { 139 return nil, ErrLengthTooLarge.New(length, varcharVarbinaryMax/charsetMaxLength) 140 } 141 case sqltypes.Binary: 142 if maxByteLength > charBinaryMax { 143 return nil, ErrLengthTooLarge.New(length, charBinaryMax) 144 } 145 case sqltypes.VarBinary: 146 // VarBinary fields transmitted over the wire could be for a VarBinary field, 147 // or a JSON field, so we validate against JSON's larger limit (1GB) 148 // instead of VarBinary's smaller limit (65k). 149 if maxByteLength > MaxJsonFieldByteLength { 150 return nil, ErrLengthTooLarge.New(length, MaxJsonFieldByteLength/charsetMaxLength) 151 } 152 case sqltypes.Text, sqltypes.Blob: 153 if maxByteLength > LongTextBlobMax { 154 return nil, ErrLengthTooLarge.New(length, LongTextBlobMax) 155 } 156 if maxByteLength <= TinyTextBlobMax { 157 maxByteLength = TinyTextBlobMax 158 maxCharLength = TinyTextBlobMax / charsetMaxLength 159 } else if maxByteLength <= TextBlobMax { 160 maxByteLength = TextBlobMax 161 maxCharLength = TextBlobMax / charsetMaxLength 162 } else if maxByteLength <= MediumTextBlobMax { 163 maxByteLength = MediumTextBlobMax 164 maxCharLength = MediumTextBlobMax / charsetMaxLength 165 } else { 166 maxByteLength = LongTextBlobMax 167 maxCharLength = LongTextBlobMax / charsetMaxLength 168 } 169 } 170 171 return StringType{baseType, maxCharLength, maxByteLength, collation}, nil 172 } 173 174 // MustCreateString is the same as CreateString except it panics on errors. 175 func MustCreateString(baseType query.Type, length int64, collation sql.CollationID) sql.StringType { 176 st, err := CreateString(baseType, length, collation) 177 if err != nil { 178 panic(err) 179 } 180 return st 181 } 182 183 // CreateStringWithDefaults creates a StringType with the default character set and collation of the given size. 184 func CreateStringWithDefaults(baseType query.Type, length int64) (sql.StringType, error) { 185 return CreateString(baseType, length, sql.Collation_Default) 186 } 187 188 // MustCreateStringWithDefaults creates a StringType with the default CharacterSet and Collation. 189 func MustCreateStringWithDefaults(baseType query.Type, length int64) sql.StringType { 190 return MustCreateString(baseType, length, sql.Collation_Default) 191 } 192 193 // CreateBinary creates a StringType with a binary collation and character set of the given size. 194 func CreateBinary(baseType query.Type, lengthHint int64) (sql.StringType, error) { 195 return CreateString(baseType, lengthHint, sql.Collation_binary) 196 } 197 198 // MustCreateBinary is the same as CreateBinary except it panics on errors. 199 func MustCreateBinary(baseType query.Type, lengthHint int64) sql.StringType { 200 return MustCreateString(baseType, lengthHint, sql.Collation_binary) 201 } 202 203 // CreateTinyText creates a TINYTEXT with the given collation. 204 func CreateTinyText(collation sql.CollationID) sql.StringType { 205 return MustCreateString(sqltypes.Text, TinyTextBlobMax/collation.CharacterSet().MaxLength(), collation) 206 } 207 208 // CreateText creates a TEXT with the given collation. 209 func CreateText(collation sql.CollationID) sql.StringType { 210 return MustCreateString(sqltypes.Text, TextBlobMax/collation.CharacterSet().MaxLength(), collation) 211 } 212 213 // CreateMediumText creates a MEDIUMTEXT with the given collation. 214 func CreateMediumText(collation sql.CollationID) sql.StringType { 215 return MustCreateString(sqltypes.Text, MediumTextBlobMax/collation.CharacterSet().MaxLength(), collation) 216 } 217 218 // CreateLongText creates a LONGTEXT with the given collation. 219 func CreateLongText(collation sql.CollationID) sql.StringType { 220 return MustCreateString(sqltypes.Text, LongTextBlobMax/collation.CharacterSet().MaxLength(), collation) 221 } 222 223 // MaxTextResponseByteLength implements the Type interface 224 func (t StringType) MaxTextResponseByteLength(ctx *sql.Context) uint32 { 225 // For TEXT types, MySQL returns the maxByteLength multiplied by the size of the largest 226 // multibyte character in the associated charset for the maximum field bytes in the response 227 // metadata. 228 // The one exception is LongText types, which cannot be multiplied by a multibyte char multiplier, 229 // since the max bytes field in a column definition response over the wire is a uint32 and multiplying 230 // longTextBlobMax by anything over 1 would cause it to overflow. 231 if t.baseType == sqltypes.Text && t.maxByteLength != LongTextBlobMax { 232 characterSetResults := ctx.GetCharacterSetResults() 233 charsetMaxLength := uint32(characterSetResults.MaxLength()) 234 return uint32(t.maxByteLength) * charsetMaxLength 235 } else { 236 return uint32(t.maxByteLength) 237 } 238 } 239 240 func (t StringType) Length() int64 { 241 return t.maxCharLength 242 } 243 244 // Compare implements Type interface. 245 func (t StringType) Compare(a interface{}, b interface{}) (int, error) { 246 if hasNulls, res := CompareNulls(a, b); hasNulls { 247 return res, nil 248 } 249 250 var as string 251 var bs string 252 var ok bool 253 if as, ok = a.(string); !ok { 254 ai, _, err := t.Convert(a) 255 if err != nil { 256 return 0, err 257 } 258 if IsBinaryType(t) { 259 as = encodings.BytesToString(ai.([]byte)) 260 } else { 261 as = ai.(string) 262 } 263 } 264 if bs, ok = b.(string); !ok { 265 bi, _, err := t.Convert(b) 266 if err != nil { 267 return 0, err 268 } 269 if IsBinaryType(t) { 270 bs = encodings.BytesToString(bi.([]byte)) 271 } else { 272 bs = bi.(string) 273 } 274 } 275 276 encoder := t.collation.CharacterSet().Encoder() 277 getRuneWeight := t.collation.Sorter() 278 for len(as) > 0 && len(bs) > 0 { 279 ar, aRead := encoder.NextRune(as) 280 br, bRead := encoder.NextRune(bs) 281 if aRead == 0 || bRead == 0 || aRead == utf8.RuneError || bRead == utf8.RuneError { 282 // TODO: return a real error 283 return 0, fmt.Errorf("malformed string encountered while comparing") 284 } 285 aWeight := getRuneWeight(ar) 286 bWeight := getRuneWeight(br) 287 if aWeight < bWeight { 288 return -1, nil 289 } else if aWeight > bWeight { 290 return 1, nil 291 } 292 as = as[aRead:] 293 bs = bs[bRead:] 294 } 295 296 // Strings are equal up to the compared length, so shorter strings sort before longer strings 297 if len(as) < len(bs) { 298 return -1, nil 299 } else if len(as) > len(bs) { 300 return 1, nil 301 } else { 302 return 0, nil 303 } 304 } 305 306 // Convert implements Type interface. 307 func (t StringType) Convert(v interface{}) (interface{}, sql.ConvertInRange, error) { 308 if v == nil { 309 return nil, sql.InRange, nil 310 } 311 312 val, err := ConvertToString(v, t) 313 if err != nil { 314 return nil, sql.OutOfRange, err 315 } 316 317 if IsBinaryType(t) { 318 return []byte(val), sql.InRange, nil 319 } 320 return val, sql.InRange, nil 321 } 322 323 func ConvertToString(v interface{}, t sql.StringType) (string, error) { 324 var val string 325 switch s := v.(type) { 326 case bool: 327 if s { 328 val = "1" 329 } else { 330 val = "0" 331 } 332 case float64: 333 val = strconv.FormatFloat(s, 'f', -1, 64) 334 if val == "-0" { 335 val = "0" 336 } 337 case float32: 338 val = strconv.FormatFloat(float64(s), 'f', -1, 32) 339 if val == "-0" { 340 val = "0" 341 } 342 case int: 343 val = strconv.FormatInt(int64(s), 10) 344 case int8: 345 val = strconv.FormatInt(int64(s), 10) 346 case int16: 347 val = strconv.FormatInt(int64(s), 10) 348 case int32: 349 val = strconv.FormatInt(int64(s), 10) 350 case int64: 351 val = strconv.FormatInt(s, 10) 352 case uint: 353 val = strconv.FormatUint(uint64(s), 10) 354 case uint8: 355 val = strconv.FormatUint(uint64(s), 10) 356 case uint16: 357 val = strconv.FormatUint(uint64(s), 10) 358 case uint32: 359 val = strconv.FormatUint(uint64(s), 10) 360 case uint64: 361 val = strconv.FormatUint(s, 10) 362 case string: 363 val = s 364 case []byte: 365 val = string(s) 366 case time.Time: 367 val = s.Format(sql.TimestampDatetimeLayout) 368 case decimal.Decimal: 369 val = s.StringFixed(s.Exponent() * -1) 370 case decimal.NullDecimal: 371 if !s.Valid { 372 return "", nil 373 } 374 val = s.Decimal.String() 375 376 case JSONStringer: 377 var err error 378 val, err = s.JSONString() 379 if err != nil { 380 return "", err 381 } 382 val, err = strings.Unquote(val) 383 if err != nil { 384 return "", err 385 } 386 case sql.JSONWrapper: 387 jsonInterface := s.ToInterface() 388 jsonBytes, err := json.Marshal(jsonInterface) 389 if err != nil { 390 return "", err 391 } 392 val, err = strings.Unquote(string(jsonBytes)) 393 if err != nil { 394 return "", err 395 } 396 case GeometryValue: 397 return string(s.Serialize()), nil 398 default: 399 return "", sql.ErrConvertToSQL.New(s, t) 400 } 401 402 s := t.(StringType) 403 if s.baseType == sqltypes.Text { 404 // for TEXT types, we use the byte length instead of the character length 405 if int64(len(val)) > s.maxByteLength { 406 return "", ErrLengthBeyondLimit.New(val, t.String()) 407 } 408 } else { 409 if t.CharacterSet().MaxLength() == 1 { 410 // if the character set only has a max size of 1, we can just count the bytes 411 if int64(len(val)) > s.maxCharLength { 412 return "", ErrLengthBeyondLimit.New(val, t.String()) 413 } 414 } else { 415 // TODO: this should count the string's length properly according to the character set 416 // convert 'val' string to rune to count the character length, not byte length 417 if int64(len([]rune(val))) > s.maxCharLength { 418 return "", ErrLengthBeyondLimit.New(val, t.String()) 419 } 420 } 421 } 422 423 if s.baseType == sqltypes.Binary { 424 val += strings2.Repeat(string([]byte{0}), int(s.maxCharLength)-len(val)) 425 } 426 427 return val, nil 428 } 429 430 // ConvertToCollatedString returns the given interface as a string, along with its collation. If the Type possess a 431 // collation, then that collation is returned. If the Type does not possess a collation (such as an integer), then the 432 // value is converted to a string and the default collation is used. If the value is already a string then no additional 433 // conversions are made. If the value is a byte slice then a non-copying conversion is made, which means that the 434 // original byte slice MUST NOT be modified after being passed to this function. If modifications need to be made, then 435 // you must allocate a new byte slice and pass that new one in. 436 func ConvertToCollatedString(val interface{}, typ sql.Type) (string, sql.CollationID, error) { 437 var content string 438 var collation sql.CollationID 439 var err error 440 if typeWithCollation, ok := typ.(sql.TypeWithCollation); ok { 441 collation = typeWithCollation.Collation() 442 if strVal, ok := val.(string); ok { 443 content = strVal 444 } else if byteVal, ok := val.([]byte); ok { 445 content = encodings.BytesToString(byteVal) 446 } else { 447 val, _, err = LongText.Convert(val) 448 if err != nil { 449 return "", sql.Collation_Unspecified, err 450 } 451 content = val.(string) 452 } 453 } else { 454 collation = sql.Collation_Default 455 val, _, err = LongText.Convert(val) 456 if err != nil { 457 return "", sql.Collation_Unspecified, err 458 } 459 content = val.(string) 460 } 461 return content, collation, nil 462 } 463 464 // MustConvert implements the Type interface. 465 func (t StringType) MustConvert(v interface{}) interface{} { 466 value, _, err := t.Convert(v) 467 if err != nil { 468 panic(err) 469 } 470 return value 471 } 472 473 // Equals implements the Type interface. 474 func (t StringType) Equals(otherType sql.Type) bool { 475 if ot, ok := otherType.(StringType); ok { 476 return t.baseType == ot.baseType && t.collation == ot.collation && t.maxCharLength == ot.maxCharLength 477 } 478 return false 479 } 480 481 // Promote implements the Type interface. 482 func (t StringType) Promote() sql.Type { 483 switch t.baseType { 484 case sqltypes.Char, sqltypes.VarChar, sqltypes.Text: 485 return MustCreateString(sqltypes.Text, LongTextBlobMax, t.collation) 486 case sqltypes.Binary, sqltypes.VarBinary, sqltypes.Blob: 487 return LongBlob 488 default: 489 panic(sql.ErrInvalidBaseType.New(t.baseType.String(), "string")) 490 } 491 } 492 493 // SQL implements Type interface. 494 func (t StringType) SQL(ctx *sql.Context, dest []byte, v interface{}) (sqltypes.Value, error) { 495 if v == nil { 496 return sqltypes.NULL, nil 497 } 498 499 var val []byte 500 if IsBinaryType(t) { 501 v, _, err := t.Convert(v) 502 if err != nil { 503 return sqltypes.Value{}, err 504 } 505 val = AppendAndSliceBytes(dest, v.([]byte)) 506 } else { 507 v, err := ConvertToString(v, t) 508 if err != nil { 509 return sqltypes.Value{}, err 510 } 511 resultCharset := ctx.GetCharacterSetResults() 512 if resultCharset == sql.CharacterSet_Unspecified || resultCharset == sql.CharacterSet_binary { 513 resultCharset = t.collation.CharacterSet() 514 } 515 encodedBytes, ok := resultCharset.Encoder().Encode(encodings.StringToBytes(v)) 516 if !ok { 517 snippet := v 518 if len(snippet) > 50 { 519 snippet = snippet[:50] 520 } 521 snippet = strings2.ToValidUTF8(snippet, string(utf8.RuneError)) 522 return sqltypes.Value{}, sql.ErrCharSetFailedToEncode.New(resultCharset.Name(), utf8.ValidString(v), snippet) 523 } 524 val = AppendAndSliceBytes(dest, encodedBytes) 525 } 526 527 return sqltypes.MakeTrusted(t.baseType, val), nil 528 } 529 530 // String implements Type interface. 531 func (t StringType) String() string { 532 return t.StringWithTableCollation(sql.Collation_Default) 533 } 534 535 // Type implements Type interface. 536 func (t StringType) Type() query.Type { 537 return t.baseType 538 } 539 540 // ValueType implements Type interface. 541 func (t StringType) ValueType() reflect.Type { 542 if IsBinaryType(t) { 543 return byteValueType 544 } 545 return stringValueType 546 } 547 548 // Zero implements Type interface. 549 func (t StringType) Zero() interface{} { 550 return "" 551 } 552 553 // CollationCoercibility implements sql.CollationCoercible interface. 554 func (t StringType) CollationCoercibility(ctx *sql.Context) (collation sql.CollationID, coercibility byte) { 555 return t.collation, 4 556 } 557 558 func (t StringType) CharacterSet() sql.CharacterSetID { 559 return t.collation.CharacterSet() 560 } 561 562 func (t StringType) Collation() sql.CollationID { 563 return t.collation 564 } 565 566 // StringWithTableCollation implements sql.TypeWithCollation interface. 567 func (t StringType) StringWithTableCollation(tableCollation sql.CollationID) string { 568 var s string 569 570 switch t.baseType { 571 case sqltypes.Char: 572 s = fmt.Sprintf("char(%v)", t.maxCharLength) 573 case sqltypes.Binary: 574 s = fmt.Sprintf("binary(%v)", t.maxCharLength) 575 case sqltypes.VarChar: 576 s = fmt.Sprintf("varchar(%v)", t.maxCharLength) 577 case sqltypes.VarBinary: 578 s = fmt.Sprintf("varbinary(%v)", t.maxCharLength) 579 case sqltypes.Text: 580 if t.maxByteLength <= TinyTextBlobMax { 581 s = "tinytext" 582 } else if t.maxByteLength <= TextBlobMax { 583 s = "text" 584 } else if t.maxByteLength <= MediumTextBlobMax { 585 s = "mediumtext" 586 } else { 587 s = "longtext" 588 } 589 case sqltypes.Blob: 590 if t.maxByteLength <= TinyTextBlobMax { 591 s = "tinyblob" 592 } else if t.maxByteLength <= TextBlobMax { 593 s = "blob" 594 } else if t.maxByteLength <= MediumTextBlobMax { 595 s = "mediumblob" 596 } else { 597 s = "longblob" 598 } 599 } 600 601 if t.CharacterSet() != sql.CharacterSet_binary { 602 if t.CharacterSet() != tableCollation.CharacterSet() { 603 s += " CHARACTER SET " + t.CharacterSet().String() 604 } 605 if t.collation != tableCollation { 606 s += " COLLATE " + t.collation.Name() 607 } 608 } 609 610 return s 611 } 612 613 // WithNewCollation implements TypeWithCollation interface. 614 func (t StringType) WithNewCollation(collation sql.CollationID) (sql.Type, error) { 615 // Blobs are special as, although they use collations, they don't change like a standard collated type 616 if t.baseType == sqltypes.Blob || t.baseType == sqltypes.Binary || t.baseType == sqltypes.VarBinary { 617 return t, nil 618 } 619 return CreateString(t.baseType, t.maxCharLength, collation) 620 } 621 622 // MaxCharacterLength is the maximum character length for this type. 623 func (t StringType) MaxCharacterLength() int64 { 624 return t.maxCharLength 625 } 626 627 // MaxByteLength is the maximum number of bytes that may be consumed by a string that conforms to this type. 628 func (t StringType) MaxByteLength() int64 { 629 return t.maxByteLength 630 } 631 632 // TODO: move me 633 func AppendAndSliceString(buffer []byte, addition string) (slice []byte) { 634 stop := len(buffer) 635 buffer = append(buffer, addition...) 636 slice = buffer[stop:] 637 return 638 } 639 640 func AppendAndSliceBytes(buffer, addition []byte) (slice []byte) { 641 stop := len(buffer) 642 buffer = append(buffer, addition...) 643 slice = buffer[stop:] 644 return 645 }