github.com/vanus-labs/vanus/lib@v0.0.0-20231221070800-1334a7b9605e/bytes/escape.go (about)

     1  // Copyright 2023 Linkall Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bytes
    16  
    17  import (
    18  	// standard libraries.
    19  	"errors"
    20  	"io"
    21  	"unicode/utf8"
    22  )
    23  
    24  var (
    25  	errInvalidEscapeChar  = errors.New("invalid escape character")
    26  	errInvalidUnicodeChar = errors.New("invalid unicode character")
    27  	errInvalidHexChar     = errors.New("invalid hexadecimal character")
    28  	errInvalidOctChar     = errors.New("invalid octal character")
    29  )
    30  
    31  const (
    32  	highSurrogateMin = 0xD800
    33  	highSurrogateMax = 0xDBFF
    34  	lowSurrogateMin  = 0xDC00
    35  	lowSurrogateMax  = 0xDFFF
    36  )
    37  
    38  var (
    39  	octBitmap [256]bool
    40  	hexBitmap [256]bool
    41  	hexToByte [256]byte
    42  	hexToRune [256]rune
    43  )
    44  
    45  func init() { //nolint:gochecknoinits // init constant table
    46  	for i := '0'; i <= '7'; i++ {
    47  		octBitmap[i] = true
    48  	}
    49  
    50  	for i := '0'; i <= '9'; i++ {
    51  		hexBitmap[i] = true
    52  		hexToRune[i] = i - '0'
    53  		hexToByte[i] = byte(hexToRune[i])
    54  	}
    55  	shift := 'a' - 'A'
    56  	for i := 'A'; i <= 'F'; i++ {
    57  		hexBitmap[i] = true
    58  		hexBitmap[i+shift] = true
    59  		hexToRune[i] = i - 'A' + 10 //nolint:gomnd // base number
    60  		hexToRune[i+shift] = hexToRune[i]
    61  		hexToByte[i] = byte(hexToRune[i])
    62  		hexToByte[i+shift] = hexToByte[i]
    63  	}
    64  }
    65  
    66  func ConsumeEscaped(r io.ByteReader, w io.ByteWriter, plan string) error {
    67  	c, err := r.ReadByte()
    68  	if err != nil {
    69  		return errInvalidEscapeChar
    70  	}
    71  	return consumeEscapedExt(c, r, w, plan)
    72  }
    73  
    74  func consumeEscapedExt(c byte, r io.ByteReader, w io.ByteWriter, plan string) error {
    75  	p := UnsafeAt(plan, int(c))
    76  	switch p {
    77  	case '.':
    78  		return errInvalidEscapeChar
    79  	case 's': // Self
    80  		return w.WriteByte(c)
    81  	case 'u': // \uNNNN
    82  		ru, err := ExpectUnicodeChar(r)
    83  		if err != nil {
    84  			return err
    85  		}
    86  		return WriteRune(w, ru)
    87  	case 'x': // \xNN
    88  		cc, err := ExpectHexChar(r)
    89  		if err != nil {
    90  			return err
    91  		}
    92  		return w.WriteByte(cc)
    93  	case 'o': // \NNN
    94  		cc, err := ExpectOctCharExt(c, r)
    95  		if err != nil {
    96  			return err
    97  		}
    98  		return w.WriteByte(cc)
    99  	default:
   100  		return w.WriteByte(p)
   101  	}
   102  }
   103  
   104  func ExpectUnicodeChar(r io.ByteReader) (rune, error) {
   105  	hi, err := expectUnicodeSurrogate(r)
   106  	if err != nil {
   107  		return utf8.RuneError, errInvalidUnicodeChar
   108  	}
   109  
   110  	// non-surrogate
   111  	if hi < highSurrogateMin || hi > lowSurrogateMax {
   112  		return hi, nil
   113  	}
   114  
   115  	// error of high-surrogate
   116  	if hi > highSurrogateMax {
   117  		return utf8.RuneError, errInvalidUnicodeChar
   118  	}
   119  
   120  	if ExpectChar(r, '\\') != nil || ExpectChar(r, 'u') != nil {
   121  		return utf8.RuneError, errInvalidUnicodeChar
   122  	}
   123  
   124  	lo, err := expectUnicodeSurrogate(r)
   125  	if err != nil {
   126  		return utf8.RuneError, errInvalidUnicodeChar
   127  	}
   128  
   129  	// error of low-surrogate
   130  	if lowSurrogateMin < 0xDC00 || lo > lowSurrogateMax {
   131  		return utf8.RuneError, errInvalidUnicodeChar
   132  	}
   133  
   134  	return 0x10000 + (hi-highSurrogateMin)<<10 + (lo - lowSurrogateMin), nil
   135  }
   136  
   137  func expectUnicodeSurrogate(r io.ByteReader) (rune, error) {
   138  	b0, err := r.ReadByte()
   139  	if err != nil || !hexBitmap[b0] {
   140  		return 0, errInvalidUnicodeChar
   141  	}
   142  	b1, err := r.ReadByte()
   143  	if err != nil || !hexBitmap[b1] {
   144  		return 0, errInvalidUnicodeChar
   145  	}
   146  	b2, err := r.ReadByte()
   147  	if err != nil || !hexBitmap[b2] {
   148  		return 0, errInvalidUnicodeChar
   149  	}
   150  	b3, err := r.ReadByte()
   151  	if err != nil || !hexBitmap[b3] {
   152  		return 0, errInvalidUnicodeChar
   153  	}
   154  	ru := hexToRune[b0]*0x1000 + hexToRune[b1]*0x100 + hexToRune[b2]*0x10 + hexToRune[b3]
   155  	return ru, nil
   156  }
   157  
   158  func ExpectHexChar(r io.ByteReader) (byte, error) {
   159  	b0, err := r.ReadByte()
   160  	if err != nil || !hexBitmap[b0] {
   161  		return 0, errInvalidHexChar
   162  	}
   163  	b1, err := r.ReadByte()
   164  	if err != nil || !hexBitmap[b1] {
   165  		return 0, errInvalidHexChar
   166  	}
   167  	return hexToByte[b0]*0x10 + hexToByte[b1], nil
   168  }
   169  
   170  func ExpectOctCharExt(b0 byte, r io.ByteReader) (byte, error) {
   171  	b1, err := r.ReadByte()
   172  	if err != nil || !octBitmap[b1] {
   173  		return 0, errInvalidOctChar
   174  	}
   175  	b2, err := r.ReadByte()
   176  	if err != nil || !octBitmap[b2] {
   177  		return 0, errInvalidOctChar
   178  	}
   179  	return (b0-'0')*0o100 + (b1-'0')*0o10 + (b2-'0')*0o1, nil
   180  }