github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/src/runtime/string.go

github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/src/runtime/string.go (about)

     1  package runtime
     2  
     3  // This file implements functions related to Go strings.
     4  
     5  import (
     6  	"unsafe"
     7  )
     8  
     9  // The underlying struct for the Go string type.
    10  type _string struct {
    11  	ptr    *byte
    12  	length uintptr
    13  }
    14  
    15  // The iterator state for a range over a string.
    16  type stringIterator struct {
    17  	byteindex uintptr
    18  }
    19  
    20  // Return true iff the strings match.
    21  //
    22  //go:nobounds
    23  func stringEqual(x, y string) bool {
    24  	if len(x) != len(y) {
    25  		return false
    26  	}
    27  	for i := 0; i < len(x); i++ {
    28  		if x[i] != y[i] {
    29  			return false
    30  		}
    31  	}
    32  	return true
    33  }
    34  
    35  // Return true iff x < y.
    36  //
    37  //go:nobounds
    38  func stringLess(x, y string) bool {
    39  	l := len(x)
    40  	if m := len(y); m < l {
    41  		l = m
    42  	}
    43  	for i := 0; i < l; i++ {
    44  		if x[i] < y[i] {
    45  			return true
    46  		}
    47  		if x[i] > y[i] {
    48  			return false
    49  		}
    50  	}
    51  	return len(x) < len(y)
    52  }
    53  
    54  // Add two strings together.
    55  func stringConcat(x, y _string) _string {
    56  	if x.length == 0 {
    57  		return y
    58  	} else if y.length == 0 {
    59  		return x
    60  	} else {
    61  		length := x.length + y.length
    62  		buf := alloc(length, nil)
    63  		memcpy(buf, unsafe.Pointer(x.ptr), x.length)
    64  		memcpy(unsafe.Add(buf, x.length), unsafe.Pointer(y.ptr), y.length)
    65  		return _string{ptr: (*byte)(buf), length: length}
    66  	}
    67  }
    68  
    69  // Create a string from a []byte slice.
    70  func stringFromBytes(x struct {
    71  	ptr *byte
    72  	len uintptr
    73  	cap uintptr
    74  }) _string {
    75  	buf := alloc(x.len, nil)
    76  	memcpy(buf, unsafe.Pointer(x.ptr), x.len)
    77  	return _string{ptr: (*byte)(buf), length: x.len}
    78  }
    79  
    80  // Convert a string to a []byte slice.
    81  func stringToBytes(x _string) (slice struct {
    82  	ptr *byte
    83  	len uintptr
    84  	cap uintptr
    85  }) {
    86  	buf := alloc(x.length, nil)
    87  	memcpy(buf, unsafe.Pointer(x.ptr), x.length)
    88  	slice.ptr = (*byte)(buf)
    89  	slice.len = x.length
    90  	slice.cap = x.length
    91  	return
    92  }
    93  
    94  // Convert a []rune slice to a string.
    95  func stringFromRunes(runeSlice []rune) (s _string) {
    96  	// Count the number of characters that will be in the string.
    97  	for _, r := range runeSlice {
    98  		_, numBytes := encodeUTF8(r)
    99  		s.length += numBytes
   100  	}
   101  
   102  	// Allocate memory for the string.
   103  	s.ptr = (*byte)(alloc(s.length, nil))
   104  
   105  	// Encode runes to UTF-8 and store the resulting bytes in the string.
   106  	index := uintptr(0)
   107  	for _, r := range runeSlice {
   108  		array, numBytes := encodeUTF8(r)
   109  		for _, c := range array[:numBytes] {
   110  			*(*byte)(unsafe.Add(unsafe.Pointer(s.ptr), index)) = c
   111  			index++
   112  		}
   113  	}
   114  
   115  	return
   116  }
   117  
   118  // Convert a string to []rune slice.
   119  func stringToRunes(s string) []rune {
   120  	var n = 0
   121  	for range s {
   122  		n++
   123  	}
   124  	var r = make([]rune, n)
   125  	n = 0
   126  	for _, e := range s {
   127  		r[n] = e
   128  		n++
   129  	}
   130  	return r
   131  }
   132  
   133  // Create a string from a Unicode code point.
   134  func stringFromUnicode(x rune) _string {
   135  	array, length := encodeUTF8(x)
   136  	// Array will be heap allocated.
   137  	// The heap most likely doesn't work with blocks below 4 bytes, so there's
   138  	// no point in allocating a smaller buffer for the string here.
   139  	return _string{ptr: (*byte)(unsafe.Pointer(&array)), length: length}
   140  }
   141  
   142  // Iterate over a string.
   143  // Returns (ok, key, value).
   144  func stringNext(s string, it *stringIterator) (bool, int, rune) {
   145  	if len(s) <= int(it.byteindex) {
   146  		return false, 0, 0
   147  	}
   148  	i := int(it.byteindex)
   149  	r, length := decodeUTF8(s, it.byteindex)
   150  	it.byteindex += length
   151  	return true, i, r
   152  }
   153  
   154  // Convert a Unicode code point into an array of bytes and its length.
   155  func encodeUTF8(x rune) ([4]byte, uintptr) {
   156  	// https://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16
   157  	// Note: this code can probably be optimized (in size and speed).
   158  	switch {
   159  	case x <= 0x7f:
   160  		return [4]byte{byte(x), 0, 0, 0}, 1
   161  	case x <= 0x7ff:
   162  		b1 := 0xc0 | byte(x>>6)
   163  		b2 := 0x80 | byte(x&0x3f)
   164  		return [4]byte{b1, b2, 0, 0}, 2
   165  	case 0xd800 <= x && x <= 0xdfff:
   166  		// utf-16 surrogates are replaced with "invalid code point"
   167  		return [4]byte{0xef, 0xbf, 0xbd, 0}, 3
   168  	case x <= 0xffff:
   169  		b1 := 0xe0 | byte(x>>12)
   170  		b2 := 0x80 | byte((x>>6)&0x3f)
   171  		b3 := 0x80 | byte((x>>0)&0x3f)
   172  		return [4]byte{b1, b2, b3, 0}, 3
   173  	case x <= 0x10ffff:
   174  		b1 := 0xf0 | byte(x>>18)
   175  		b2 := 0x80 | byte((x>>12)&0x3f)
   176  		b3 := 0x80 | byte((x>>6)&0x3f)
   177  		b4 := 0x80 | byte((x>>0)&0x3f)
   178  		return [4]byte{b1, b2, b3, b4}, 4
   179  	default:
   180  		// Invalid Unicode code point.
   181  		return [4]byte{0xef, 0xbf, 0xbd, 0}, 3
   182  	}
   183  }
   184  
   185  // Decode a single UTF-8 character from a string.
   186  //
   187  //go:nobounds
   188  func decodeUTF8(s string, index uintptr) (rune, uintptr) {
   189  	remaining := uintptr(len(s)) - index // must be >= 1 before calling this function
   190  	x := s[index]
   191  	switch {
   192  	case x&0x80 == 0x00: // 0xxxxxxx
   193  		return rune(x), 1
   194  	case x&0xe0 == 0xc0: // 110xxxxx
   195  		if remaining < 2 || !isContinuation(s[index+1]) {
   196  			return 0xfffd, 1
   197  		}
   198  		r := (rune(x&0x1f) << 6) | (rune(s[index+1]) & 0x3f)
   199  		if r >= 1<<7 {
   200  			// Check whether the rune really needed to be encoded as a two-byte
   201  			// sequence. UTF-8 requires every rune to be encoded in the smallest
   202  			// sequence possible.
   203  			return r, 2
   204  		}
   205  	case x&0xf0 == 0xe0: // 1110xxxx
   206  		if remaining < 3 || !isContinuation(s[index+1]) || !isContinuation(s[index+2]) {
   207  			return 0xfffd, 1
   208  		}
   209  		r := (rune(x&0x0f) << 12) | ((rune(s[index+1]) & 0x3f) << 6) | (rune(s[index+2]) & 0x3f)
   210  		if r >= 1<<11 && !(r >= 0xD800 && r <= 0xDFFF) {
   211  			// Check whether the rune really needed to be encoded as a
   212  			// three-byte sequence and check that this is not a Unicode
   213  			// surrogate pair (which are not allowed by UTF-8).
   214  			return r, 3
   215  		}
   216  	case x&0xf8 == 0xf0: // 11110xxx
   217  		if remaining < 4 || !isContinuation(s[index+1]) || !isContinuation(s[index+2]) || !isContinuation(s[index+3]) {
   218  			return 0xfffd, 1
   219  		}
   220  		r := (rune(x&0x07) << 18) | ((rune(s[index+1]) & 0x3f) << 12) | ((rune(s[index+2]) & 0x3f) << 6) | (rune(s[index+3]) & 0x3f)
   221  		if r >= 1<<16 && r <= '\U0010FFFF' {
   222  			// Check whether this rune really needed to be encoded as a four
   223  			// byte sequence and check that the resulting rune is in the valid
   224  			// range (up to at most U+10FFFF).
   225  			return r, 4
   226  		}
   227  	}
   228  
   229  	// Failed to decode. Return the Unicode replacement character and a length of 1.
   230  	return 0xfffd, 1
   231  }
   232  
   233  // isContinuation returns true if (and only if) this is a UTF-8 continuation
   234  // byte.
   235  func isContinuation(b byte) bool {
   236  	// Continuation bytes have their topmost bits set to 0b10.
   237  	return b&0xc0 == 0x80
   238  }
   239  
   240  // Functions used in CGo.
   241  
   242  // Convert a Go string to a C string.
   243  func cgo_CString(s _string) unsafe.Pointer {
   244  	buf := malloc(s.length + 1)
   245  	memcpy(buf, unsafe.Pointer(s.ptr), s.length)
   246  	*(*byte)(unsafe.Add(buf, s.length)) = 0 // trailing 0 byte
   247  	return buf
   248  }
   249  
   250  // Convert a C string to a Go string.
   251  func cgo_GoString(cstr unsafe.Pointer) _string {
   252  	if cstr == nil {
   253  		return _string{}
   254  	}
   255  	return makeGoString(cstr, strlen(cstr))
   256  }
   257  
   258  // Convert a C data buffer to a Go string (that possibly contains 0 bytes).
   259  func cgo_GoStringN(cstr unsafe.Pointer, length uintptr) _string {
   260  	return makeGoString(cstr, length)
   261  }
   262  
   263  // Make a Go string given a source buffer and a length.
   264  func makeGoString(cstr unsafe.Pointer, length uintptr) _string {
   265  	s := _string{
   266  		length: length,
   267  	}
   268  	if s.length != 0 {
   269  		buf := make([]byte, s.length)
   270  		s.ptr = &buf[0]
   271  		memcpy(unsafe.Pointer(s.ptr), cstr, s.length)
   272  	}
   273  	return s
   274  }
   275  
   276  // Convert a C data buffer to a Go byte slice.
   277  func cgo_GoBytes(ptr unsafe.Pointer, length uintptr) []byte {
   278  	// Note: don't return nil if length is 0, to match the behavior of C.GoBytes
   279  	// of upstream Go.
   280  	buf := make([]byte, length)
   281  	if length != 0 {
   282  		memcpy(unsafe.Pointer(&buf[0]), ptr, uintptr(length))
   283  	}
   284  	return buf
   285  }