github.com/primecitizens/pcz/std@v0.2.1/text/unicode/utf8/encode.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2009 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  package utf8
     9  
    10  import (
    11  	"github.com/primecitizens/pcz/std/core/assert"
    12  	. "github.com/primecitizens/pcz/std/text/unicode/common"
    13  )
    14  
    15  // RuneLen returns the number of bytes required to encode the rune.
    16  //
    17  // It returns 0 if the rune is not a valid value to encode in UTF-8.
    18  func RuneLen(r rune) int {
    19  	if r >= 0 && r <= Rune1Max {
    20  		return 1
    21  	}
    22  
    23  	return RuneLenNonASCII(r)
    24  }
    25  
    26  func RuneLenNonASCII(r rune) int {
    27  	switch {
    28  	case r < 0:
    29  	case r <= Rune2Max:
    30  		return 2
    31  	case SurrogateMin <= r && r <= SurrogateMax:
    32  	case r <= Rune3Max:
    33  		return 3
    34  	case r <= MaxRune:
    35  		return 4
    36  	}
    37  	return 0
    38  }
    39  
    40  // EncodeRune writes into dst the UTF-8 encoding of the rune.
    41  //
    42  // If the rune is out of range, it writes the encoding of RuneError.
    43  //
    44  // It returns the number of bytes written, and when dst is too small to
    45  // write the complete byte sequence of the rune, it returns 0.
    46  func EncodeRune(dst []byte, r rune) ([]byte, int) {
    47  	if len(dst) == cap(dst) {
    48  		return dst, 0
    49  	} else if uint32(r) <= Rune1Max {
    50  		return append(dst, byte(r)), 1
    51  	}
    52  
    53  	return EncodeRuneNonASCII(dst, r)
    54  }
    55  
    56  func EncodeRuneNonASCII(dst []byte, r rune) ([]byte, int) {
    57  	// Negative values are erroneous. Making it unsigned addresses the problem.
    58  	switch u := uint32(r); {
    59  	case u <= Rune2Max:
    60  		return append(dst, t2|byte(r>>6), tx|byte(r)&maskx), 2
    61  	case u > MaxRune,
    62  		SurrogateMin <= u && u <= SurrogateMax:
    63  		r = RuneError
    64  		fallthrough
    65  	case u <= Rune3Max:
    66  		return append(dst, t3|byte(r>>12), tx|byte(r>>6)&maskx, tx|byte(r)&maskx), 3
    67  	default:
    68  		return append(dst, t4|byte(r>>18), tx|byte(r>>12)&maskx, tx|byte(r>>6)&maskx, tx|byte(r)&maskx), 4
    69  	}
    70  }
    71  
    72  // AppendRunes appends the UTF-8 encoding of r to the end of p and
    73  // returns the extended buffer. If the rune is out of range,
    74  // it appends the encoding of RuneError.
    75  func AppendRunes(dst []byte, src ...rune) []byte {
    76  	var n int
    77  	for _, r := range src {
    78  		if dst, n = EncodeRune(dst, r); n == 0 {
    79  			assert.TODO("grow")
    80  		}
    81  	}
    82  
    83  	return dst
    84  }