github.com/nuvolaris/goja@v0.0.0-20230825100449-967811910c6d/unistring/string.go (about) 1 // Package unistring contains an implementation of a hybrid ASCII/UTF-16 string. 2 // For ASCII strings the underlying representation is equivalent to a normal Go string. 3 // For unicode strings the underlying representation is UTF-16 as []uint16 with 0th element set to 0xFEFF. 4 // unicode.String allows representing malformed UTF-16 values (e.g. stand-alone parts of surrogate pairs) 5 // which cannot be represented in UTF-8. 6 // At the same time it is possible to use unicode.String as property keys just as efficiently as simple strings, 7 // (the leading 0xFEFF ensures there is no clash with ASCII string), and it is possible to convert it 8 // to valueString without extra allocations. 9 package unistring 10 11 import ( 12 "reflect" 13 "unicode/utf16" 14 "unicode/utf8" 15 "unsafe" 16 ) 17 18 const ( 19 BOM = 0xFEFF 20 ) 21 22 type String string 23 24 // Scan checks if the string contains any unicode characters. If it does, converts to an array suitable for creating 25 // a String using FromUtf16, otherwise returns nil. 26 func Scan(s string) []uint16 { 27 utf16Size := 0 28 for ; utf16Size < len(s); utf16Size++ { 29 if s[utf16Size] >= utf8.RuneSelf { 30 goto unicode 31 } 32 } 33 return nil 34 unicode: 35 for _, chr := range s[utf16Size:] { 36 utf16Size++ 37 if chr > 0xFFFF { 38 utf16Size++ 39 } 40 } 41 42 buf := make([]uint16, utf16Size+1) 43 buf[0] = BOM 44 c := 1 45 for _, chr := range s { 46 if chr <= 0xFFFF { 47 buf[c] = uint16(chr) 48 } else { 49 first, second := utf16.EncodeRune(chr) 50 buf[c] = uint16(first) 51 c++ 52 buf[c] = uint16(second) 53 } 54 c++ 55 } 56 57 return buf 58 } 59 60 func NewFromString(s string) String { 61 if buf := Scan(s); buf != nil { 62 return FromUtf16(buf) 63 } 64 return String(s) 65 } 66 67 func NewFromRunes(s []rune) String { 68 ascii := true 69 size := 0 70 for _, c := range s { 71 if c >= utf8.RuneSelf { 72 ascii = false 73 if c > 0xFFFF { 74 size++ 75 } 76 } 77 size++ 78 } 79 if ascii { 80 return String(s) 81 } 82 b := make([]uint16, size+1) 83 b[0] = BOM 84 i := 1 85 for _, c := range s { 86 if c <= 0xFFFF { 87 b[i] = uint16(c) 88 } else { 89 first, second := utf16.EncodeRune(c) 90 b[i] = uint16(first) 91 i++ 92 b[i] = uint16(second) 93 } 94 i++ 95 } 96 return FromUtf16(b) 97 } 98 99 func FromUtf16(b []uint16) String { 100 var str string 101 hdr := (*reflect.StringHeader)(unsafe.Pointer(&str)) 102 hdr.Data = uintptr(unsafe.Pointer(&b[0])) 103 hdr.Len = len(b) * 2 104 105 return String(str) 106 } 107 108 func (s String) String() string { 109 if b := s.AsUtf16(); b != nil { 110 return string(utf16.Decode(b[1:])) 111 } 112 113 return string(s) 114 } 115 116 func (s String) AsUtf16() []uint16 { 117 if len(s) < 4 || len(s)&1 != 0 { 118 return nil 119 } 120 121 var a []uint16 122 raw := string(s) 123 124 sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&a)) 125 sliceHeader.Data = (*reflect.StringHeader)(unsafe.Pointer(&raw)).Data 126 127 l := len(raw) / 2 128 129 sliceHeader.Len = l 130 sliceHeader.Cap = l 131 132 if a[0] == BOM { 133 return a 134 } 135 136 return nil 137 }