github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/src/runtime/string.go (about) 1 package runtime 2 3 // This file implements functions related to Go strings. 4 5 import ( 6 "unsafe" 7 ) 8 9 // The underlying struct for the Go string type. 10 type _string struct { 11 ptr *byte 12 length uintptr 13 } 14 15 // The iterator state for a range over a string. 16 type stringIterator struct { 17 byteindex uintptr 18 } 19 20 // Return true iff the strings match. 21 // 22 //go:nobounds 23 func stringEqual(x, y string) bool { 24 if len(x) != len(y) { 25 return false 26 } 27 for i := 0; i < len(x); i++ { 28 if x[i] != y[i] { 29 return false 30 } 31 } 32 return true 33 } 34 35 // Return true iff x < y. 36 // 37 //go:nobounds 38 func stringLess(x, y string) bool { 39 l := len(x) 40 if m := len(y); m < l { 41 l = m 42 } 43 for i := 0; i < l; i++ { 44 if x[i] < y[i] { 45 return true 46 } 47 if x[i] > y[i] { 48 return false 49 } 50 } 51 return len(x) < len(y) 52 } 53 54 // Add two strings together. 55 func stringConcat(x, y _string) _string { 56 if x.length == 0 { 57 return y 58 } else if y.length == 0 { 59 return x 60 } else { 61 length := x.length + y.length 62 buf := alloc(length, nil) 63 memcpy(buf, unsafe.Pointer(x.ptr), x.length) 64 memcpy(unsafe.Add(buf, x.length), unsafe.Pointer(y.ptr), y.length) 65 return _string{ptr: (*byte)(buf), length: length} 66 } 67 } 68 69 // Create a string from a []byte slice. 70 func stringFromBytes(x struct { 71 ptr *byte 72 len uintptr 73 cap uintptr 74 }) _string { 75 buf := alloc(x.len, nil) 76 memcpy(buf, unsafe.Pointer(x.ptr), x.len) 77 return _string{ptr: (*byte)(buf), length: x.len} 78 } 79 80 // Convert a string to a []byte slice. 81 func stringToBytes(x _string) (slice struct { 82 ptr *byte 83 len uintptr 84 cap uintptr 85 }) { 86 buf := alloc(x.length, nil) 87 memcpy(buf, unsafe.Pointer(x.ptr), x.length) 88 slice.ptr = (*byte)(buf) 89 slice.len = x.length 90 slice.cap = x.length 91 return 92 } 93 94 // Convert a []rune slice to a string. 95 func stringFromRunes(runeSlice []rune) (s _string) { 96 // Count the number of characters that will be in the string. 97 for _, r := range runeSlice { 98 _, numBytes := encodeUTF8(r) 99 s.length += numBytes 100 } 101 102 // Allocate memory for the string. 103 s.ptr = (*byte)(alloc(s.length, nil)) 104 105 // Encode runes to UTF-8 and store the resulting bytes in the string. 106 index := uintptr(0) 107 for _, r := range runeSlice { 108 array, numBytes := encodeUTF8(r) 109 for _, c := range array[:numBytes] { 110 *(*byte)(unsafe.Add(unsafe.Pointer(s.ptr), index)) = c 111 index++ 112 } 113 } 114 115 return 116 } 117 118 // Convert a string to []rune slice. 119 func stringToRunes(s string) []rune { 120 var n = 0 121 for range s { 122 n++ 123 } 124 var r = make([]rune, n) 125 n = 0 126 for _, e := range s { 127 r[n] = e 128 n++ 129 } 130 return r 131 } 132 133 // Create a string from a Unicode code point. 134 func stringFromUnicode(x rune) _string { 135 array, length := encodeUTF8(x) 136 // Array will be heap allocated. 137 // The heap most likely doesn't work with blocks below 4 bytes, so there's 138 // no point in allocating a smaller buffer for the string here. 139 return _string{ptr: (*byte)(unsafe.Pointer(&array)), length: length} 140 } 141 142 // Iterate over a string. 143 // Returns (ok, key, value). 144 func stringNext(s string, it *stringIterator) (bool, int, rune) { 145 if len(s) <= int(it.byteindex) { 146 return false, 0, 0 147 } 148 i := int(it.byteindex) 149 r, length := decodeUTF8(s, it.byteindex) 150 it.byteindex += length 151 return true, i, r 152 } 153 154 // Convert a Unicode code point into an array of bytes and its length. 155 func encodeUTF8(x rune) ([4]byte, uintptr) { 156 // https://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16 157 // Note: this code can probably be optimized (in size and speed). 158 switch { 159 case x <= 0x7f: 160 return [4]byte{byte(x), 0, 0, 0}, 1 161 case x <= 0x7ff: 162 b1 := 0xc0 | byte(x>>6) 163 b2 := 0x80 | byte(x&0x3f) 164 return [4]byte{b1, b2, 0, 0}, 2 165 case 0xd800 <= x && x <= 0xdfff: 166 // utf-16 surrogates are replaced with "invalid code point" 167 return [4]byte{0xef, 0xbf, 0xbd, 0}, 3 168 case x <= 0xffff: 169 b1 := 0xe0 | byte(x>>12) 170 b2 := 0x80 | byte((x>>6)&0x3f) 171 b3 := 0x80 | byte((x>>0)&0x3f) 172 return [4]byte{b1, b2, b3, 0}, 3 173 case x <= 0x10ffff: 174 b1 := 0xf0 | byte(x>>18) 175 b2 := 0x80 | byte((x>>12)&0x3f) 176 b3 := 0x80 | byte((x>>6)&0x3f) 177 b4 := 0x80 | byte((x>>0)&0x3f) 178 return [4]byte{b1, b2, b3, b4}, 4 179 default: 180 // Invalid Unicode code point. 181 return [4]byte{0xef, 0xbf, 0xbd, 0}, 3 182 } 183 } 184 185 // Decode a single UTF-8 character from a string. 186 // 187 //go:nobounds 188 func decodeUTF8(s string, index uintptr) (rune, uintptr) { 189 remaining := uintptr(len(s)) - index // must be >= 1 before calling this function 190 x := s[index] 191 switch { 192 case x&0x80 == 0x00: // 0xxxxxxx 193 return rune(x), 1 194 case x&0xe0 == 0xc0: // 110xxxxx 195 if remaining < 2 || !isContinuation(s[index+1]) { 196 return 0xfffd, 1 197 } 198 r := (rune(x&0x1f) << 6) | (rune(s[index+1]) & 0x3f) 199 if r >= 1<<7 { 200 // Check whether the rune really needed to be encoded as a two-byte 201 // sequence. UTF-8 requires every rune to be encoded in the smallest 202 // sequence possible. 203 return r, 2 204 } 205 case x&0xf0 == 0xe0: // 1110xxxx 206 if remaining < 3 || !isContinuation(s[index+1]) || !isContinuation(s[index+2]) { 207 return 0xfffd, 1 208 } 209 r := (rune(x&0x0f) << 12) | ((rune(s[index+1]) & 0x3f) << 6) | (rune(s[index+2]) & 0x3f) 210 if r >= 1<<11 && !(r >= 0xD800 && r <= 0xDFFF) { 211 // Check whether the rune really needed to be encoded as a 212 // three-byte sequence and check that this is not a Unicode 213 // surrogate pair (which are not allowed by UTF-8). 214 return r, 3 215 } 216 case x&0xf8 == 0xf0: // 11110xxx 217 if remaining < 4 || !isContinuation(s[index+1]) || !isContinuation(s[index+2]) || !isContinuation(s[index+3]) { 218 return 0xfffd, 1 219 } 220 r := (rune(x&0x07) << 18) | ((rune(s[index+1]) & 0x3f) << 12) | ((rune(s[index+2]) & 0x3f) << 6) | (rune(s[index+3]) & 0x3f) 221 if r >= 1<<16 && r <= '\U0010FFFF' { 222 // Check whether this rune really needed to be encoded as a four 223 // byte sequence and check that the resulting rune is in the valid 224 // range (up to at most U+10FFFF). 225 return r, 4 226 } 227 } 228 229 // Failed to decode. Return the Unicode replacement character and a length of 1. 230 return 0xfffd, 1 231 } 232 233 // isContinuation returns true if (and only if) this is a UTF-8 continuation 234 // byte. 235 func isContinuation(b byte) bool { 236 // Continuation bytes have their topmost bits set to 0b10. 237 return b&0xc0 == 0x80 238 } 239 240 // Functions used in CGo. 241 242 // Convert a Go string to a C string. 243 func cgo_CString(s _string) unsafe.Pointer { 244 buf := malloc(s.length + 1) 245 memcpy(buf, unsafe.Pointer(s.ptr), s.length) 246 *(*byte)(unsafe.Add(buf, s.length)) = 0 // trailing 0 byte 247 return buf 248 } 249 250 // Convert a C string to a Go string. 251 func cgo_GoString(cstr unsafe.Pointer) _string { 252 if cstr == nil { 253 return _string{} 254 } 255 return makeGoString(cstr, strlen(cstr)) 256 } 257 258 // Convert a C data buffer to a Go string (that possibly contains 0 bytes). 259 func cgo_GoStringN(cstr unsafe.Pointer, length uintptr) _string { 260 return makeGoString(cstr, length) 261 } 262 263 // Make a Go string given a source buffer and a length. 264 func makeGoString(cstr unsafe.Pointer, length uintptr) _string { 265 s := _string{ 266 length: length, 267 } 268 if s.length != 0 { 269 buf := make([]byte, s.length) 270 s.ptr = &buf[0] 271 memcpy(unsafe.Pointer(s.ptr), cstr, s.length) 272 } 273 return s 274 } 275 276 // Convert a C data buffer to a Go byte slice. 277 func cgo_GoBytes(ptr unsafe.Pointer, length uintptr) []byte { 278 // Note: don't return nil if length is 0, to match the behavior of C.GoBytes 279 // of upstream Go. 280 buf := make([]byte, length) 281 if length != 0 { 282 memcpy(unsafe.Pointer(&buf[0]), ptr, uintptr(length)) 283 } 284 return buf 285 }