github.com/dolthub/go-mysql-server@v0.18.0/internal/strings/unquote.go (about) 1 package strings 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "encoding/hex" 7 "fmt" 8 "unicode/utf8" 9 ) 10 11 // The implementation is taken from TiDB 12 // https://github.com/pingcap/tidb/blob/a594287e9f402037b06930026906547000006bb6/types/json/binary_functions.go#L89 13 func Unquote(s string) (string, error) { 14 ret := new(bytes.Buffer) 15 for i := 0; i < len(s); i++ { 16 if s[i] == '\\' { 17 i++ 18 if i == len(s) { 19 return "", fmt.Errorf("Missing a closing quotation mark in string") 20 } 21 switch s[i] { 22 case '"': 23 ret.WriteByte('"') 24 case 'b': 25 ret.WriteByte('\b') 26 case 'f': 27 ret.WriteByte('\f') 28 case 'n': 29 ret.WriteByte('\n') 30 case 'r': 31 ret.WriteByte('\r') 32 case 't': 33 ret.WriteByte('\t') 34 case '\\': 35 ret.WriteByte('\\') 36 case 'u': 37 if i+4 > len(s) { 38 return "", fmt.Errorf("Invalid unicode: %s", s[i+1:]) 39 } 40 char, size, err := decodeEscapedUnicode([]byte(s[i+1 : i+5])) 41 if err != nil { 42 return "", err 43 } 44 ret.Write(char[0:size]) 45 i += 4 46 default: 47 // For all other escape sequences, backslash is ignored. 48 ret.WriteByte(s[i]) 49 } 50 } else { 51 ret.WriteByte(s[i]) 52 } 53 } 54 55 str := ret.String() 56 strlen := len(str) 57 // Remove prefix and suffix '"'. 58 if strlen > 1 { 59 head, tail := str[0], str[strlen-1] 60 if head == '"' && tail == '"' { 61 return str[1 : strlen-1], nil 62 } 63 } 64 return str, nil 65 } 66 67 // decodeEscapedUnicode decodes unicode into utf8 bytes specified in RFC 3629. 68 // According RFC 3629, the max length of utf8 characters is 4 bytes. 69 // And MySQL use 4 bytes to represent the unicode which must be in [0, 65536). 70 // The implementation is taken from TiDB: 71 // https://github.com/pingcap/tidb/blob/a594287e9f402037b06930026906547000006bb6/types/json/binary_functions.go#L136 72 func decodeEscapedUnicode(s []byte) (char [4]byte, size int, err error) { 73 size, err = hex.Decode(char[0:2], s) 74 if err != nil || size != 2 { 75 // The unicode must can be represented in 2 bytes. 76 return char, 0, err 77 } 78 var unicode uint16 79 err = binary.Read(bytes.NewReader(char[0:2]), binary.BigEndian, &unicode) 80 if err != nil { 81 return char, 0, err 82 } 83 size = utf8.RuneLen(rune(unicode)) 84 utf8.EncodeRune(char[0:size], rune(unicode)) 85 return 86 }