github.com/iDigitalFlame/xmt@v0.5.4/device/winapi/utf16.go (about) 1 // Copyright (C) 2020 - 2023 iDigitalFlame 2 // 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU General Public License as published by 5 // the Free Software Foundation, either version 3 of the License, or 6 // any later version. 7 // 8 // This program is distributed in the hope that it will be useful, 9 // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 // GNU General Public License for more details. 12 // 13 // You should have received a copy of the GNU General Public License 14 // along with this program. If not, see <https://www.gnu.org/licenses/>. 15 // 16 17 // Package winapi is a Windows specific package that assists with handeling or 18 // accessing many advanced WinAPI functions. This package also contains various 19 // helper functions to assist with complex operations. 20 package winapi 21 22 import ( 23 "syscall" 24 "unsafe" 25 ) 26 27 const ( 28 utfSelf = 0x10000 29 utfSurgA = 0xd800 30 utfSurgB = 0xdc00 31 utfSurgC = 0xe000 32 utfRuneMax = rune(0x10FFFF) // '\U0010FFFF' 33 utfReplacement = rune(0xFFFD) // '\uFFFD' 34 ) 35 36 // SliceHeader is the runtime representation of a slice. 37 // 38 // It cannot be used safely or portably and its representation may change in a 39 // later release. 40 // ^ Hey, shut up. 41 type SliceHeader struct { 42 Data unsafe.Pointer 43 Len int 44 Cap int 45 } 46 47 // FnvHash returns the fnv32 hash of the supplied string value. 48 func FnvHash(n string) uint32 { 49 h := uint32(2166136261) 50 for i := range n { 51 h *= 16777619 52 h ^= uint32(n[i]) 53 } 54 return h 55 } 56 57 // UTF16Decode returns the Unicode code point sequence represented by the UTF-16 58 // encoding rune values supplied. 59 func UTF16Decode(s []uint16) []rune { 60 var ( 61 b = make([]rune, len(s)) 62 n int 63 ) 64 loop: 65 for i := 0; i < len(s); i++ { 66 switch r := s[i]; { 67 case r == 0: 68 break loop 69 case r < utfSurgA, utfSurgC <= r: 70 b[n] = rune(r) 71 case utfSurgA <= r && r < utfSurgB && i+1 < len(s) && utfSurgB <= s[i+1] && s[i+1] < utfSurgC: 72 b[n] = utf16DecodeRune(rune(r), rune(s[i+1])) 73 i++ 74 default: 75 b[n] = utfReplacement 76 } 77 n++ 78 } 79 return b[:n] 80 } 81 82 // UTF16ToString returns the UTF-8 encoding of the UTF-16 sequence s, with a 83 // terminating NUL and any bytes after the NUL removed. 84 func UTF16ToString(s []uint16) string { 85 return string(UTF16Decode(s)) 86 } 87 func utf16DecodeRune(r1, r2 rune) rune { 88 if utfSurgA <= r1 && r1 < utfSurgB && utfSurgB <= r2 && r2 < utfSurgC { 89 return (r1-utfSurgA)<<10 | (r2 - utfSurgB) + utfSelf 90 } 91 return utfReplacement 92 } 93 94 // UTF16EncodeStd encodes the runes into a UTF16 array and ignores zero points. 95 // 96 // This is ONLY safe to use if you know what you're doing. 97 func UTF16EncodeStd(s []rune) []uint16 { 98 n := len(s) 99 for i := range s { 100 if s[i] < utfSelf { 101 continue 102 } 103 n++ 104 } 105 var ( 106 b = make([]uint16, n) 107 i int 108 ) 109 for n = 0; n < len(s); i++ { 110 switch { 111 case 0 <= s[i] && s[i] < utfSurgA, utfSurgC <= s[i] && s[i] < utfSelf: 112 b[n] = uint16(s[i]) 113 n++ 114 case utfSelf <= s[i] && s[i] <= utfRuneMax: 115 b[n], b[n+1] = utf16EncodeRune(s[i]) 116 n += 2 117 default: 118 b[n] = uint16(utfReplacement) 119 n++ 120 } 121 } 122 return b[:n] 123 } 124 125 // UTF16PtrToString takes a pointer to a UTF-16 sequence and returns the 126 // corresponding UTF-8 encoded string. 127 // 128 // If the pointer is nil, it returns the empty string. It assumes that the UTF-16 129 // sequence is terminated at a zero word; if the zero word is not present, the 130 // program may crash. 131 func UTF16PtrToString(p *uint16) string { 132 if p == nil || *p == 0 { 133 return "" 134 } 135 n := 0 136 for v := unsafe.Pointer(p); *(*uint16)(v) != 0; n++ { 137 v = unsafe.Pointer(uintptr(v) + unsafe.Sizeof(*p)) 138 } 139 var s []uint16 140 h := (*SliceHeader)(unsafe.Pointer(&s)) 141 h.Data, h.Len, h.Cap = unsafe.Pointer(p), n, n 142 return string(UTF16Decode(s)) 143 } 144 func utf16Encode(s []rune) ([]uint16, error) { 145 n := len(s) 146 for i := range s { 147 if s[i] == 0 && i+1 < len(s) { 148 return nil, syscall.EINVAL 149 } 150 if s[i] < utfSelf { 151 continue 152 } 153 n++ 154 } 155 var ( 156 b = make([]uint16, n) 157 i int 158 ) 159 for n = 0; n < len(s); i++ { 160 switch { 161 case s[i] == 0 && i+1 < len(s): 162 return nil, syscall.EINVAL 163 case 0 <= s[i] && s[i] < utfSurgA, utfSurgC <= s[i] && s[i] < utfSelf: 164 b[n] = uint16(s[i]) 165 n++ 166 case utfSelf <= s[i] && s[i] <= utfRuneMax: 167 b[n], b[n+1] = utf16EncodeRune(s[i]) 168 n += 2 169 default: 170 b[n] = uint16(utfReplacement) 171 n++ 172 } 173 } 174 return b[:n], nil 175 } 176 func utf16EncodeRune(r rune) (uint16, uint16) { 177 if r < utfSelf || r > utfRuneMax { 178 return uint16(utfReplacement), uint16(utfReplacement) 179 } 180 r -= utfSelf 181 return uint16(utfSurgA + (r>>10)&0x3FF), uint16(utfSurgB + r&0x3FF) 182 } 183 184 // UTF16FromString returns the UTF-16 encoding of the UTF-8 string with a 185 // terminating NUL added. 186 // 187 // If the string contains a NUL byte at any location, it returns syscall.EINVAL. 188 func UTF16FromString(s string) ([]uint16, error) { 189 if len(s) == 0 { 190 return []uint16{0}, nil 191 } 192 return utf16Encode([]rune(s + "\x00")) 193 } 194 195 // UTF16PtrFromString returns pointer to the UTF-16 encoding of the UTF-8 string, 196 // with a terminating NUL added. 197 // 198 // If the string contains a NUL byte at any location, it returns syscall.EINVAL. 199 func UTF16PtrFromString(s string) (*uint16, error) { 200 a, err := UTF16FromString(s) 201 if err != nil { 202 return nil, err 203 } 204 return &a[0], nil 205 }