github.com/dolthub/go-mysql-server@v0.18.0/sql/encodings/encoder.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package encodings 16 17 import ( 18 "reflect" 19 "unsafe" 20 ) 21 22 // Encoder is used to transcode from one encoding to another, along with handling uppercase and lowercase conversions. 23 // Decoding always converts to Go's string encoding, while encoding always converts to the target encoding. Encoding and 24 // decoding are bidirectional. 25 type Encoder interface { 26 // Decode converts from the encoding represented by this Encoder to Go's string encoding (utf8mb4-equivalent). This is 27 // intended for decoding whole strings (that are represented as byte slices), to decode individual codepoints use 28 // DecodeRune. Do note that the string parameter is NOT modified in any way. Refer to IsReturnSafe to check if the 29 // returned byte slice must be copied before modifications may be made. 30 Decode(str []byte) ([]byte, bool) 31 // Encode converts from Go's string encoding (utf8mb4-equivalent) to the encoding represented by this Encoder. This is 32 // intended for encoding whole strings (that are represented as byte slices), to encode individual codepoints use 33 // EncodeRune. Do note that the string parameter is NOT modified in any way. Refer to IsReturnSafe to check if the 34 // returned byte slice must be copied before modifications may be made. 35 Encode(str []byte) ([]byte, bool) 36 // EncodeReplaceUnknown converts from Go's string encoding (utf8mb4-equivalent) to the encoding represented by this Encoder. 37 // This is intended for encoding whole strings (that are represented as byte slices), to encode individual codepoints use 38 // EncodeRune. Do note that the string parameter is NOT modified in any way. Refer to IsReturnSafe to check if the 39 // returned byte slice must be copied before modifications may be made. Unlike the standard Encode function, this will 40 // replace unknown sequences with a question mark (?), meaning that all encodings will return a result. 41 EncodeReplaceUnknown(str []byte) []byte 42 // DecodeRune converts from the encoding represented by this Encoder to Go's rune encoding (utf8mb4-equivalent). 43 // Refer to IsReturnSafe to check if the returned byte slice must be copied before modifications may be made. 44 DecodeRune(r []byte) ([]byte, bool) 45 // EncodeRune converts from Go's rune encoding (utf8mb4-equivalent) to the encoding represented by this Encoder. 46 // Refer to IsReturnSafe to check if the returned byte slice must be copied before modifications may be made. 47 EncodeRune(r []byte) ([]byte, bool) 48 // Uppercase returns a new string with all codepoints converted to their uppercase variants as determined by this 49 // Encoder. 50 Uppercase(str string) string 51 // Lowercase returns a new string with all codepoints converted to their lowercase variants as determined by this 52 // Encoder. 53 Lowercase(str string) string 54 // UppercaseRune returns the uppercase variant of the given rune. If the rune does not have such a variant, then the 55 // input rune is returned. 56 UppercaseRune(r rune) rune 57 // LowercaseRune returns the lowercase variant of the given rune. If the rune does not have such a variant, then the 58 // input rune is returned. 59 LowercaseRune(r rune) rune 60 // NextRune returns the next rune of a string that was decoded by this encoder. This is ONLY intended for sorting 61 // both character strings and binary strings from a single code path. All non-binary strings will use 62 // utf8.DecodeRuneInString internally, therefore it is recommended that all performance-critical code handles binary 63 // strings separately, and uses utf8.DecodeRuneInString without having to go through this interface. 64 NextRune(str string) (rune, int) 65 // IsReturnSafe returns whether it is safe to modify the byte slices returned by Decode, Encode, DecodeRune, and 66 // EncodeRune. 67 IsReturnSafe() bool 68 } 69 70 // BytesToString returns the byte slice (representing a valid Go/utf8mb4-encoded string) as a string without allocations. 71 // After this call is made, no further changes should be made to the byte slice, as strings are supposed to be immutable. 72 // Alterations could lead to undefined behavior. This properly handles nil and empty byte slices. 73 func BytesToString(str []byte) string { 74 // Empty slices may not allocate a backing array (and nil slices definitely do not), so we have to check first 75 if len(str) == 0 { 76 return "" 77 } 78 return *(*string)(unsafe.Pointer(&str)) 79 } 80 81 // StringToBytes returns the string as a byte slice without allocations. No changes should be made to the returned byte 82 // slice, as strings are supposed to be immutable. Alterations could lead to undefined behavior. This is only intended 83 // to allow strings to be passed to any functions that work on string data as a byte slice, and specifically do not 84 // modify the byte slice. This properly handles empty strings. 85 func StringToBytes(str string) []byte { 86 // Empty strings may not allocate a backing array, so we have to check first 87 if len(str) == 0 { 88 // It makes sense to return a non-nil empty byte slice since we're passing in a non-nil (although empty) string 89 return []byte{} 90 } 91 return (*[0x7fff0000]byte)(unsafe.Pointer( 92 (*reflect.StringHeader)(unsafe.Pointer(&str)).Data), 93 )[:len(str):len(str)] 94 }