github.com/kamalshkeir/kencoding@v0.0.2-0.20230409043843-44b609a0475a/json/string.go (about)

     1  package json
     2  
     3  import (
     4  	"math/bits"
     5  	"unsafe"
     6  )
     7  
     8  const (
     9  	lsb = 0x0101010101010101
    10  	msb = 0x8080808080808080
    11  )
    12  
    13  // escapeIndex finds the index of the first char in `s` that requires escaping.
    14  // A char requires escaping if it's outside of the range of [0x20, 0x7F] or if
    15  // it includes a double quote or backslash. If the escapeHTML mode is enabled,
    16  // the chars <, > and & also require escaping. If no chars in `s` require
    17  // escaping, the return value is -1.
    18  func escapeIndex(s string, escapeHTML bool) int {
    19  	chunks := stringToUint64(s)
    20  	for _, n := range chunks {
    21  		// combine masks before checking for the MSB of each byte. We include
    22  		// `n` in the mask to check whether any of the *input* byte MSBs were
    23  		// set (i.e. the byte was outside the ASCII range).
    24  		mask := n | below(n, 0x20) | contains(n, '"') | contains(n, '\\')
    25  		if escapeHTML {
    26  			mask |= contains(n, '<') | contains(n, '>') | contains(n, '&')
    27  		}
    28  		if (mask & msb) != 0 {
    29  			return bits.TrailingZeros64(mask&msb) / 8
    30  		}
    31  	}
    32  
    33  	for i := len(chunks) * 8; i < len(s); i++ {
    34  		c := s[i]
    35  		if c < 0x20 || c > 0x7f || c == '"' || c == '\\' || (escapeHTML && (c == '<' || c == '>' || c == '&')) {
    36  			return i
    37  		}
    38  	}
    39  
    40  	return -1
    41  }
    42  
    43  // below return a mask that can be used to determine if any of the bytes
    44  // in `n` are below `b`. If a byte's MSB is set in the mask then that byte was
    45  // below `b`. The result is only valid if `b`, and each byte in `n`, is below
    46  // 0x80.
    47  func below(n uint64, b byte) uint64 {
    48  	return n - expand(b)
    49  }
    50  
    51  // contains returns a mask that can be used to determine if any of the
    52  // bytes in `n` are equal to `b`. If a byte's MSB is set in the mask then
    53  // that byte is equal to `b`. The result is only valid if `b`, and each
    54  // byte in `n`, is below 0x80.
    55  func contains(n uint64, b byte) uint64 {
    56  	return (n ^ expand(b)) - lsb
    57  }
    58  
    59  // expand puts the specified byte into each of the 8 bytes of a uint64.
    60  func expand(b byte) uint64 {
    61  	return lsb * uint64(b)
    62  }
    63  
    64  func stringToUint64(s string) []uint64 {
    65  	return *(*[]uint64)(unsafe.Pointer(&sliceHeader{
    66  		Data: *(*unsafe.Pointer)(unsafe.Pointer(&s)),
    67  		Len:  len(s) / 8,
    68  		Cap:  len(s) / 8,
    69  	}))
    70  }