github.com/dolthub/go-mysql-server@v0.18.0/internal/strings/unquote.go (about)

     1  package strings
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"encoding/hex"
     7  	"fmt"
     8  	"unicode/utf8"
     9  )
    10  
    11  // The implementation is taken from TiDB
    12  // https://github.com/pingcap/tidb/blob/a594287e9f402037b06930026906547000006bb6/types/json/binary_functions.go#L89
    13  func Unquote(s string) (string, error) {
    14  	ret := new(bytes.Buffer)
    15  	for i := 0; i < len(s); i++ {
    16  		if s[i] == '\\' {
    17  			i++
    18  			if i == len(s) {
    19  				return "", fmt.Errorf("Missing a closing quotation mark in string")
    20  			}
    21  			switch s[i] {
    22  			case '"':
    23  				ret.WriteByte('"')
    24  			case 'b':
    25  				ret.WriteByte('\b')
    26  			case 'f':
    27  				ret.WriteByte('\f')
    28  			case 'n':
    29  				ret.WriteByte('\n')
    30  			case 'r':
    31  				ret.WriteByte('\r')
    32  			case 't':
    33  				ret.WriteByte('\t')
    34  			case '\\':
    35  				ret.WriteByte('\\')
    36  			case 'u':
    37  				if i+4 > len(s) {
    38  					return "", fmt.Errorf("Invalid unicode: %s", s[i+1:])
    39  				}
    40  				char, size, err := decodeEscapedUnicode([]byte(s[i+1 : i+5]))
    41  				if err != nil {
    42  					return "", err
    43  				}
    44  				ret.Write(char[0:size])
    45  				i += 4
    46  			default:
    47  				// For all other escape sequences, backslash is ignored.
    48  				ret.WriteByte(s[i])
    49  			}
    50  		} else {
    51  			ret.WriteByte(s[i])
    52  		}
    53  	}
    54  
    55  	str := ret.String()
    56  	strlen := len(str)
    57  	// Remove prefix and suffix '"'.
    58  	if strlen > 1 {
    59  		head, tail := str[0], str[strlen-1]
    60  		if head == '"' && tail == '"' {
    61  			return str[1 : strlen-1], nil
    62  		}
    63  	}
    64  	return str, nil
    65  }
    66  
    67  // decodeEscapedUnicode decodes unicode into utf8 bytes specified in RFC 3629.
    68  // According RFC 3629, the max length of utf8 characters is 4 bytes.
    69  // And MySQL use 4 bytes to represent the unicode which must be in [0, 65536).
    70  // The implementation is taken from TiDB:
    71  // https://github.com/pingcap/tidb/blob/a594287e9f402037b06930026906547000006bb6/types/json/binary_functions.go#L136
    72  func decodeEscapedUnicode(s []byte) (char [4]byte, size int, err error) {
    73  	size, err = hex.Decode(char[0:2], s)
    74  	if err != nil || size != 2 {
    75  		// The unicode must can be represented in 2 bytes.
    76  		return char, 0, err
    77  	}
    78  	var unicode uint16
    79  	err = binary.Read(bytes.NewReader(char[0:2]), binary.BigEndian, &unicode)
    80  	if err != nil {
    81  		return char, 0, err
    82  	}
    83  	size = utf8.RuneLen(rune(unicode))
    84  	utf8.EncodeRune(char[0:size], rune(unicode))
    85  	return
    86  }