github.com/primecitizens/pcz/std@v0.2.1/text/unicode/utf8/encode.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2009 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 package utf8 9 10 import ( 11 "github.com/primecitizens/pcz/std/core/assert" 12 . "github.com/primecitizens/pcz/std/text/unicode/common" 13 ) 14 15 // RuneLen returns the number of bytes required to encode the rune. 16 // 17 // It returns 0 if the rune is not a valid value to encode in UTF-8. 18 func RuneLen(r rune) int { 19 if r >= 0 && r <= Rune1Max { 20 return 1 21 } 22 23 return RuneLenNonASCII(r) 24 } 25 26 func RuneLenNonASCII(r rune) int { 27 switch { 28 case r < 0: 29 case r <= Rune2Max: 30 return 2 31 case SurrogateMin <= r && r <= SurrogateMax: 32 case r <= Rune3Max: 33 return 3 34 case r <= MaxRune: 35 return 4 36 } 37 return 0 38 } 39 40 // EncodeRune writes into dst the UTF-8 encoding of the rune. 41 // 42 // If the rune is out of range, it writes the encoding of RuneError. 43 // 44 // It returns the number of bytes written, and when dst is too small to 45 // write the complete byte sequence of the rune, it returns 0. 46 func EncodeRune(dst []byte, r rune) ([]byte, int) { 47 if len(dst) == cap(dst) { 48 return dst, 0 49 } else if uint32(r) <= Rune1Max { 50 return append(dst, byte(r)), 1 51 } 52 53 return EncodeRuneNonASCII(dst, r) 54 } 55 56 func EncodeRuneNonASCII(dst []byte, r rune) ([]byte, int) { 57 // Negative values are erroneous. Making it unsigned addresses the problem. 58 switch u := uint32(r); { 59 case u <= Rune2Max: 60 return append(dst, t2|byte(r>>6), tx|byte(r)&maskx), 2 61 case u > MaxRune, 62 SurrogateMin <= u && u <= SurrogateMax: 63 r = RuneError 64 fallthrough 65 case u <= Rune3Max: 66 return append(dst, t3|byte(r>>12), tx|byte(r>>6)&maskx, tx|byte(r)&maskx), 3 67 default: 68 return append(dst, t4|byte(r>>18), tx|byte(r>>12)&maskx, tx|byte(r>>6)&maskx, tx|byte(r)&maskx), 4 69 } 70 } 71 72 // AppendRunes appends the UTF-8 encoding of r to the end of p and 73 // returns the extended buffer. If the rune is out of range, 74 // it appends the encoding of RuneError. 75 func AppendRunes(dst []byte, src ...rune) []byte { 76 var n int 77 for _, r := range src { 78 if dst, n = EncodeRune(dst, r); n == 0 { 79 assert.TODO("grow") 80 } 81 } 82 83 return dst 84 }